[AMDGPU] Handle lowering addrspace casts from LDS to FLAT address in amdgpu-sw-lower-lds. (#121214)

"infer-address-spaces" pass replaces all refinable generic pointers with
equivalent specific pointers.

At -O0 optimisation level, infer-address-spaces pass doesn't run in the
pipeline.

"amdgpu-sw-lower-lds" pass instruments memory operations on addrspace(3)
ptrs. Since, extra addrspacecasts are present from lds to flat
addrspaces at -O0 and the actual store/load memory instructions are now
on flat addrspace, these addrspacecast need to be handled in the
amdgpu-sw-lower-lds pass itself. This patch lowers the lds ptr first to
the corresponding ptr in the global memory from the asan_malloc. Then
replaces the original cast with addrspacecast from global ptr to flat
ptr.
This commit is contained in:
Chaitanya 2025-02-19 08:50:23 +05:30 committed by GitHub
parent 6662fe393c
commit aed9f11965
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 539 additions and 300 deletions

View File

@ -192,8 +192,7 @@ public:
void getLDSMemoryInstructions(Function *Func,
SetVector<Instruction *> &LDSInstructions);
void replaceKernelLDSAccesses(Function *Func);
Value *getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr,
Value *LDSPtr);
Value *getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr, Value *LDSPtr);
void translateLDSMemoryOperationsToGlobalMemory(
Function *Func, Value *LoadMallocPtr,
SetVector<Instruction *> &LDSInstructions);
@ -655,20 +654,30 @@ void AMDGPUSwLowerLDS::getLDSMemoryInstructions(
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(&Inst)) {
if (XCHG->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
LDSInstructions.insert(&Inst);
} else if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&Inst)) {
if (ASC->getSrcAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS)
LDSInstructions.insert(&Inst);
} else
continue;
}
}
}
Value *
AMDGPUSwLowerLDS::getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr,
Value *AMDGPUSwLowerLDS::getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr,
Value *LDSPtr) {
assert(LDSPtr && "Invalid LDS pointer operand");
Value *PtrToInt = IRB.CreatePtrToInt(LDSPtr, IRB.getInt32Ty());
Value *GEP =
IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {PtrToInt});
return GEP;
Type *LDSPtrType = LDSPtr->getType();
LLVMContext &Ctx = M.getContext();
const DataLayout &DL = M.getDataLayout();
Type *IntTy = DL.getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS);
if (auto *VecPtrTy = dyn_cast<VectorType>(LDSPtrType)) {
// Handle vector of pointers
ElementCount NumElements = VecPtrTy->getElementCount();
IntTy = VectorType::get(IntTy, NumElements);
}
Value *GepIndex = IRB.CreatePtrToInt(LDSPtr, IntTy);
return IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {GepIndex});
}
void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
@ -681,7 +690,7 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
Value *LIOperand = LI->getPointerOperand();
Value *Replacement =
getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, LIOperand);
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, LIOperand);
LoadInst *NewLI = IRB.CreateAlignedLoad(LI->getType(), Replacement,
LI->getAlign(), LI->isVolatile());
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
@ -691,7 +700,7 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
Value *SIOperand = SI->getPointerOperand();
Value *Replacement =
getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, SIOperand);
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, SIOperand);
StoreInst *NewSI = IRB.CreateAlignedStore(
SI->getValueOperand(), Replacement, SI->getAlign(), SI->isVolatile());
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
@ -701,8 +710,8 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
Value *RMWPtrOperand = RMW->getPointerOperand();
Value *RMWValOperand = RMW->getValOperand();
Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer(
LoadMallocPtr, RMWPtrOperand);
Value *Replacement =
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, RMWPtrOperand);
AtomicRMWInst *NewRMW = IRB.CreateAtomicRMW(
RMW->getOperation(), Replacement, RMWValOperand, RMW->getAlign(),
RMW->getOrdering(), RMW->getSyncScopeID());
@ -712,8 +721,8 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
RMW->eraseFromParent();
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Inst)) {
Value *XCHGPtrOperand = XCHG->getPointerOperand();
Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer(
LoadMallocPtr, XCHGPtrOperand);
Value *Replacement =
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, XCHGPtrOperand);
AtomicCmpXchgInst *NewXCHG = IRB.CreateAtomicCmpXchg(
Replacement, XCHG->getCompareOperand(), XCHG->getNewValOperand(),
XCHG->getAlign(), XCHG->getSuccessOrdering(),
@ -722,6 +731,16 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
AsanInfo.Instructions.insert(NewXCHG);
XCHG->replaceAllUsesWith(NewXCHG);
XCHG->eraseFromParent();
} else if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Inst)) {
Value *AIOperand = ASC->getPointerOperand();
Value *Replacement =
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, AIOperand);
Value *NewAI = IRB.CreateAddrSpaceCast(Replacement, ASC->getType());
// Note: No need to add the instruction to AsanInfo instructions to be
// instrumented list. FLAT_ADDRESS ptr would have been already
// instrumented by asan pass prior to this pass.
ASC->replaceAllUsesWith(NewAI);
ASC->eraseFromParent();
} else
report_fatal_error("Unimplemented LDS lowering instruction");
}

View File

@ -20,8 +20,12 @@ define void @non_kernel_function() sanitize_address {
; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
; CHECK-NEXT: [[Y:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP14]]
; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(1) [[TMP10]] to ptr
; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP12]]
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(1) [[TMP13]] to ptr
; CHECK-NEXT: store i8 5, ptr [[TMP9]], align 8
; CHECK-NEXT: ret void
;

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if static LDS is lowered correctly when a non-kernel with LDS accesses is called from kernel.
@ -28,8 +28,12 @@ define void @use_variables() sanitize_address {
; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]]
; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]]
; CHECK-NEXT: [[TMP34:%.*]] = addrspacecast ptr addrspace(1) [[TMP33]] to ptr
; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP35]]
; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(1) [[TMP36]] to ptr
; CHECK-NEXT: store i8 3, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP14]]
@ -45,16 +49,16 @@ define void @use_variables() sanitize_address {
; CHECK-NEXT: [[TMP25:%.*]] = and i1 [[TMP21]], [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP25]])
; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP26]], 0
; CHECK-NEXT: br i1 [[TMP27]], label [[ASAN_REPORT:%.*]], label [[TMP30:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP25]], label [[TMP28:%.*]], label [[TMP29:%.*]]
; CHECK: 28:
; CHECK-NEXT: br i1 [[TMP27]], label %[[ASAN_REPORT:.*]], label %[[BB35:.*]], !prof [[PROF2:![0-9]+]]
; CHECK: [[ASAN_REPORT]]:
; CHECK-NEXT: br i1 [[TMP25]], label %[[BB33:.*]], label %[[BB34:.*]]
; CHECK: [[BB33]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7:[0-9]+]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[TMP29]]
; CHECK: 29:
; CHECK-NEXT: br label [[TMP30]]
; CHECK: 30:
; CHECK-NEXT: br label %[[BB34]]
; CHECK: [[BB34]]:
; CHECK-NEXT: br label %[[BB35]]
; CHECK: [[BB35]]:
; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8
; CHECK-NEXT: ret void
;
@ -67,15 +71,15 @@ define void @use_variables() sanitize_address {
define amdgpu_kernel void @k0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @k0(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]]
@ -100,9 +104,9 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 132
; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP68]], i64 28)
; CHECK-NEXT: br label [[TMP7]]
; CHECK: 24:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: br label %[[BB24]]
; CHECK: [[BB24]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@ -124,16 +128,16 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]]
; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]])
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0
; CHECK-NEXT: br i1 [[TMP43]], label [[ASAN_REPORT:%.*]], label [[TMP46:%.*]], !prof [[PROF2]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP41]], label [[TMP44:%.*]], label [[CONDFREE:%.*]]
; CHECK: 44:
; CHECK-NEXT: br i1 [[TMP43]], label %[[ASAN_REPORT:.*]], label %[[BB46:.*]], !prof [[PROF2]]
; CHECK: [[ASAN_REPORT]]:
; CHECK-NEXT: br i1 [[TMP41]], label %[[BB44:.*]], label %[[BB45:.*]]
; CHECK: [[BB44]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[CONDFREE]]
; CHECK: 45:
; CHECK-NEXT: br label [[TMP46]]
; CHECK: 46:
; CHECK-NEXT: br label %[[BB45]]
; CHECK: [[BB45]]:
; CHECK-NEXT: br label %[[BB46]]
; CHECK: [[BB46]]:
; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP31]], align 1
; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(3) [[TMP18]] to i32
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP47]]
@ -152,16 +156,16 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP59:%.*]] = and i1 [[TMP54]], [[TMP58]]
; CHECK-NEXT: [[TMP60:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP59]])
; CHECK-NEXT: [[TMP61:%.*]] = icmp ne i64 [[TMP60]], 0
; CHECK-NEXT: br i1 [[TMP61]], label [[ASAN_REPORT1:%.*]], label [[TMP64:%.*]], !prof [[PROF2]]
; CHECK: asan.report1:
; CHECK-NEXT: br i1 [[TMP59]], label [[TMP62:%.*]], label [[TMP63:%.*]]
; CHECK: 64:
; CHECK-NEXT: br i1 [[TMP61]], label %[[ASAN_REPORT1:.*]], label %[[BB66:.*]], !prof [[PROF2]]
; CHECK: [[ASAN_REPORT1]]:
; CHECK-NEXT: br i1 [[TMP59]], label %[[BB64:.*]], label %[[BB65:.*]]
; CHECK: [[BB64]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP83]]) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[TMP63]]
; CHECK: 65:
; CHECK-NEXT: br label [[TMP64]]
; CHECK: 66:
; CHECK-NEXT: br label %[[BB65]]
; CHECK: [[BB65]]:
; CHECK-NEXT: br label %[[BB66]]
; CHECK: [[BB66]]:
; CHECK-NEXT: [[TMP84:%.*]] = ptrtoint ptr addrspace(1) [[TMP82]] to i64
; CHECK-NEXT: [[TMP85:%.*]] = lshr i64 [[TMP84]], 3
; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[TMP85]], 2147450880
@ -174,28 +178,28 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP76:%.*]] = and i1 [[TMP72]], [[TMP75]]
; CHECK-NEXT: [[TMP77:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP76]])
; CHECK-NEXT: [[TMP78:%.*]] = icmp ne i64 [[TMP77]], 0
; CHECK-NEXT: br i1 [[TMP78]], label [[ASAN_REPORT2:%.*]], label [[TMP81:%.*]], !prof [[PROF2]]
; CHECK: asan.report2:
; CHECK-NEXT: br i1 [[TMP76]], label [[TMP79:%.*]], label [[TMP80:%.*]]
; CHECK: 79:
; CHECK-NEXT: br i1 [[TMP78]], label %[[ASAN_REPORT2:.*]], label %[[BB81:.*]], !prof [[PROF2]]
; CHECK: [[ASAN_REPORT2]]:
; CHECK-NEXT: br i1 [[TMP76]], label %[[BB79:.*]], label %[[BB80:.*]]
; CHECK: [[BB79]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP84]]) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[TMP80]]
; CHECK: 80:
; CHECK-NEXT: br label [[TMP81]]
; CHECK: 81:
; CHECK-NEXT: br label %[[BB80]]
; CHECK: [[BB80]]:
; CHECK-NEXT: br label %[[BB81]]
; CHECK: [[BB81]]:
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP48]], align 2
; CHECK-NEXT: br label [[CONDFREE1:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @use_variables()

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if LDS accesses are lowered correctly when a call is made to nested non-kernel.
@ -6,50 +6,64 @@
@A = external addrspace(3) global [8 x ptr]
@B = external addrspace(3) global [0 x i32]
;.
; @llvm.amdgcn.sw.lds.kernel_0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
; @llvm.amdgcn.sw.lds.kernel_0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_0.md.type { %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.kernel_2 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0]]
; @llvm.amdgcn.sw.lds.kernel_2.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_2.md.type { %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.kernel_1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
; @llvm.amdgcn.kernel_1.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1:![0-9]+]]
; @llvm.amdgcn.sw.lds.kernel_1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_1.md.type { %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.kernel_3 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
; @llvm.amdgcn.kernel_3.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1]]
; @llvm.amdgcn.sw.lds.kernel_3.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_3.md.type { %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [4 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3], no_sanitize_address
; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [4 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_1.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0)], [2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_2.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_3.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0)]], no_sanitize_address
;.
define amdgpu_kernel void @kernel_0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_0(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP12]] to i64
; CHECK-NEXT: [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]])
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 24)
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32)
; CHECK-NEXT: br label [[TMP7]]
; CHECK: 18:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 96
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 32)
; CHECK-NEXT: br label %[[BB18]]
; CHECK: [[BB18]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
; CHECK-NEXT: call void @call_store_A()
; CHECK-NEXT: br label [[CONDFREE:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @call_store_A()
@ -58,56 +72,56 @@ define amdgpu_kernel void @kernel_0() sanitize_address {
define amdgpu_kernel void @kernel_1() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_1(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, align 4
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]]
; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15
; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[TMP7]], align 4
; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 1), align 4
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 3
; CHECK-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 4
; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4
; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 1), align 4
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3
; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4
; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
; CHECK-NEXT: [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24)
; CHECK-NEXT: br label [[TMP14]]
; CHECK: 23:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP22]], i64 24)
; CHECK-NEXT: br label %[[BB23]]
; CHECK: [[BB23]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
; CHECK-NEXT: [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
; CHECK-NEXT: br label [[CONDFREE:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP24]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
@ -116,48 +130,48 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
define amdgpu_kernel void @kernel_2() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_2(
; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP12]] to i64
; CHECK-NEXT: [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]])
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 24)
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32)
; CHECK-NEXT: br label [[TMP7]]
; CHECK: 18:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 96
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 32)
; CHECK-NEXT: br label %[[BB18]]
; CHECK: [[BB18]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
; CHECK-NEXT: call void @store_A()
; CHECK-NEXT: br label [[CONDFREE:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @store_A()
@ -166,56 +180,56 @@ define amdgpu_kernel void @kernel_2() sanitize_address {
define amdgpu_kernel void @kernel_3() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_3(
; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META6:![0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, align 4
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]]
; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15
; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[TMP7]], align 4
; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 1), align 4
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 3
; CHECK-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 4
; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4
; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 1), align 4
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3
; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4
; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
; CHECK-NEXT: [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24)
; CHECK-NEXT: br label [[TMP14]]
; CHECK: 23:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP22]], i64 24)
; CHECK-NEXT: br label %[[BB23]]
; CHECK: [[BB23]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
; CHECK-NEXT: [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
; CHECK-NEXT: br label [[CONDFREE:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP24]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
@ -237,14 +251,16 @@ define private void @store_A() sanitize_address {
; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
; CHECK-NEXT: store ptr [[TMP10]], ptr null, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(1) [[TMP10]] to ptr
; CHECK-NEXT: store ptr [[TMP11]], ptr null, align 8
; CHECK-NEXT: ret void
;
store ptr addrspacecast (ptr addrspace(3) @A to ptr), ptr null
@ -256,14 +272,16 @@ define private ptr @get_B_ptr() sanitize_address {
; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
; CHECK-NEXT: ret ptr [[TMP10]]
; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(1) [[TMP10]] to ptr
; CHECK-NEXT: ret ptr [[TMP11]]
;
ret ptr addrspacecast (ptr addrspace(3) @B to ptr)
}
@ -272,8 +290,6 @@ define private ptr @get_B_ptr() sanitize_address {
!0 = !{i32 4, !"nosanitize_address", i32 1}
;.
; CHECK: [[META2]] = !{i32 0}
; CHECK: [[META3]] = !{i32 1}
; CHECK: [[META4]] = !{i32 2}
; CHECK: [[META5]] = !{i32 3}
; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" }
;.

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if LDS accesses are lowered correctly when a call is made to nested non-kernel.
@ -6,18 +6,32 @@
@A = external addrspace(3) global [8 x ptr]
@B = external addrspace(3) global [0 x i32]
;.
; @llvm.amdgcn.sw.lds.kernel_2 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
; @llvm.amdgcn.sw.lds.kernel_2.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_2.md.type { %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.kernel_1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
; @llvm.amdgcn.kernel_1.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1:![0-9]+]]
; @llvm.amdgcn.sw.lds.kernel_1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_1.md.type { %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.kernel_3 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
; @llvm.amdgcn.kernel_3.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1]]
; @llvm.amdgcn.sw.lds.kernel_3.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_3.md.type { %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.kernel_0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0]]
; @llvm.amdgcn.sw.lds.kernel_0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_0.md.type { %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [4 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3], no_sanitize_address
; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [4 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_1.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0)], [2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_2.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_3.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0)]], no_sanitize_address
;.
define amdgpu_kernel void @kernel_0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_0(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
@ -33,23 +47,23 @@ define amdgpu_kernel void @kernel_0() sanitize_address {
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32)
; CHECK-NEXT: br label [[TMP7]]
; CHECK: 18:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: br label %[[BB18]]
; CHECK: [[BB18]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
; CHECK-NEXT: call void @call_store_A()
; CHECK-NEXT: br label [[CONDFREE:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @call_store_A()
@ -58,16 +72,16 @@ define amdgpu_kernel void @kernel_0() sanitize_address {
define amdgpu_kernel void @kernel_1() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_1(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, align 4
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]]
@ -90,24 +104,24 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24)
; CHECK-NEXT: br label [[TMP14]]
; CHECK: 23:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: br label %[[BB23]]
; CHECK: [[BB23]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
; CHECK-NEXT: br label [[CONDFREE:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
@ -116,16 +130,16 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
define amdgpu_kernel void @kernel_2() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_2(
; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
@ -141,23 +155,23 @@ define amdgpu_kernel void @kernel_2() sanitize_address {
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32)
; CHECK-NEXT: br label [[TMP7]]
; CHECK: 18:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: br label %[[BB18]]
; CHECK: [[BB18]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
; CHECK-NEXT: call void @store_A()
; CHECK-NEXT: br label [[CONDFREE:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @store_A()
@ -166,16 +180,16 @@ define amdgpu_kernel void @kernel_2() sanitize_address {
define amdgpu_kernel void @kernel_3() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_3(
; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META6:![0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, align 4
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]]
@ -198,24 +212,24 @@ define amdgpu_kernel void @kernel_3() sanitize_address {
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24)
; CHECK-NEXT: br label [[TMP14]]
; CHECK: 23:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: br label %[[BB23]]
; CHECK: [[BB23]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
; CHECK-NEXT: br label [[CONDFREE:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
@ -243,7 +257,9 @@ define private void @store_A() sanitize_address {
; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP11]]
; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP12]] to ptr
; CHECK-NEXT: store ptr [[TMP10]], ptr null, align 8
; CHECK-NEXT: ret void
;
@ -262,7 +278,9 @@ define private ptr @get_B_ptr() sanitize_address {
; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP11]]
; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP12]] to ptr
; CHECK-NEXT: ret ptr [[TMP10]]
;
ret ptr addrspacecast (ptr addrspace(3) @B to ptr)
@ -272,8 +290,6 @@ define private ptr @get_B_ptr() sanitize_address {
!0 = !{i32 4, !"nosanitize_address", i32 1}
;.
; CHECK: [[META2]] = !{i32 0}
; CHECK: [[META3]] = !{i32 1}
; CHECK: [[META4]] = !{i32 2}
; CHECK: [[META5]] = !{i32 3}
; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" }
;.

View File

@ -29,8 +29,12 @@ define void @use_variables() sanitize_address {
; CHECK-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP9]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(1) [[TMP10]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP11]]
; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
; CHECK-NEXT: [[TMP13:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = addrspacecast ptr addrspace(1) [[TMP19]] to ptr
; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP16]]
; CHECK-NEXT: [[TMP13:%.*]] = addrspacecast ptr addrspace(1) [[TMP17]] to ptr
; CHECK-NEXT: store i8 3, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP12]] to i32
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP14]]

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if static LDS is lowered correctly when a non-kernel with LDS accesses is called from kernel.
@ -28,8 +28,12 @@ define void @use_variables() sanitize_address {
; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]]
; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]]
; CHECK-NEXT: [[TMP19:%.*]] = addrspacecast ptr addrspace(1) [[TMP18]] to ptr
; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP20]]
; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(1) [[TMP17]] to ptr
; CHECK-NEXT: store i8 3, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP14]]
@ -44,16 +48,16 @@ define void @use_variables() sanitize_address {
define amdgpu_kernel void @k0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @k0(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META1:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
; CHECK: Malloc:
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]]
@ -78,9 +82,9 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 132
; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP68]], i64 28)
; CHECK-NEXT: br label [[TMP7]]
; CHECK: 24:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
; CHECK-NEXT: br label %[[BB24]]
; CHECK: [[BB24]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@ -94,17 +98,17 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(3) [[TMP18]] to i32
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP47]]
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP48]], align 2
; CHECK-NEXT: br label [[CONDFREE1:%.*]]
; CHECK: CondFree:
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
; CHECK: Free:
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
; CHECK-NEXT: br label [[END]]
; CHECK: End:
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @use_variables()
@ -124,5 +128,6 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1]] = !{i32 0}
; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
; CHECK: [[META2]] = !{i32 0}
;.

View File

@ -0,0 +1,76 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
@lds = internal addrspace(3) global [5 x i32] poison, align 16
;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 16, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 20, i32 64 } }, no_sanitize_address
;.
define amdgpu_kernel void @k0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @k0(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 52
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 44)
; CHECK-NEXT: br label %[[BB18]]
; CHECK: [[BB18]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = addrspacecast ptr addrspace(1) [[TMP23]] to ptr
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [5 x i32], ptr [[TMP24]], i64 0, i64 0
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%gep = getelementptr inbounds [5 x i32], ptr addrspacecast (ptr addrspace(3) @lds to ptr), i64 0, i64 0
store i32 1, ptr %gep, align 4
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 4, !"nosanitize_address", i32 1}
;.
; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="16" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
;.

View File

@ -0,0 +1,95 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if vector of static LDS ptrs accesses in kernel are lowered correctly.
@lds_var1 = internal addrspace(3) global i32 poison
@lds_var2 = internal addrspace(3) global i32 poison
;.
; CHECK: @llvm.amdgcn.sw.lds.example = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.sw.lds.example.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.example.md.type { %llvm.amdgcn.sw.lds.example.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.example.md.item { i32 32, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.example.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address
;.
define amdgpu_kernel void @example() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @example(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[ENTRY:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_EXAMPLE_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.example.md, i32 0, i32 2, i32 0), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_EXAMPLE_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.example.md, i32 0, i32 2, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.example, align 8
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
; CHECK-NEXT: br label %[[ENTRY]]
; CHECK: [[ENTRY]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP20:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.example, align 8
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_EXAMPLE_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.example.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.example, i32 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_EXAMPLE_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.example.md, i32 0, i32 2, i32 0), align 4
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.example, i32 [[TMP23]]
; CHECK-NEXT: [[VEC_LDS_PTRS:%.*]] = insertelement <2 x ptr addrspace(3)> poison, ptr addrspace(3) [[TMP22]], i32 0
; CHECK-NEXT: [[VEC_LDS_PTRS1:%.*]] = insertelement <2 x ptr addrspace(3)> [[VEC_LDS_PTRS]], ptr addrspace(3) [[TMP24]], i32 1
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[VEC_LDS_PTRS1]] to <2 x i32>
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], <2 x i32> [[TMP25]]
; CHECK-NEXT: [[TMP32:%.*]] = addrspacecast <2 x ptr addrspace(1)> [[TMP31]] to <2 x ptr>
; CHECK-NEXT: [[ELEM0:%.*]] = extractelement <2 x ptr> [[TMP32]], i32 0
; CHECK-NEXT: store i32 42, ptr [[ELEM0]], align 4
; CHECK-NEXT: [[ELEM1:%.*]] = extractelement <2 x ptr> [[TMP32]], i32 1
; CHECK-NEXT: store i32 43, ptr [[ELEM1]], align 4
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP33:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP33]] to i64
; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP35]], i64 [[TMP34]])
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
entry:
; Create a vector of flat pointers
%vec_lds_ptrs = insertelement <2 x ptr addrspace(3)> poison, ptr addrspace(3) @lds_var1, i32 0
%vec_lds_ptrs1 = insertelement <2 x ptr addrspace(3)> %vec_lds_ptrs, ptr addrspace(3) @lds_var2, i32 1
%vec_flat_ptrs = addrspacecast <2 x ptr addrspace(3)> %vec_lds_ptrs1 to <2 x ptr>
%elem0 = extractelement <2 x ptr> %vec_flat_ptrs, i32 0
store i32 42, ptr %elem0, align 4
%elem1 = extractelement <2 x ptr> %vec_flat_ptrs, i32 1
store i32 43, ptr %elem1, align 4
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 4, !"nosanitize_address", i32 1}
;.
; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
;.