diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 120b2a0ef0ba..ee9ba9f79844 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -1140,7 +1140,7 @@ void AMDGPUPromoteAllocaImpl::promoteAllocaToVector(AllocaAnalysis &AA) { // // Insert a placeholder whenever we need the vector value at the top of a // basic block. - SmallVector Placeholders; + SmallSetVector Placeholders; forEachWorkListItem(AA.Vector.Worklist, [&](Instruction *I) { BasicBlock *BB = I->getParent(); auto GetCurVal = [&]() -> Value * { @@ -1155,29 +1155,27 @@ void AMDGPUPromoteAllocaImpl::promoteAllocaToVector(AllocaAnalysis &AA) { IRBuilder<> Builder(I); auto *Placeholder = cast(Builder.CreateFreeze( PoisonValue::get(AA.Vector.Ty), "promotealloca.placeholder")); - Placeholders.push_back(Placeholder); + Placeholders.insert(Placeholder); return Placeholders.back(); }; Value *Result = promoteAllocaUserToVector(I, *DL, AA, VecStoreSize, ElementSize, GetCurVal); - if (Result) + // If the returned result is a placeholder, it means the instruction does + // not really modify the alloca. So no need to make it being available value + // to SSAUpdater. + // This will stop placeholder being cached in SSAUpdater. The cached + // placeholder may cause stale pointer being referenced when doing + // placeholder replacement. + if (Result && (!isa(Result) || + !Placeholders.contains(cast(Result)))) Updater.AddAvailableValue(BB, Result); }); // Now fixup the placeholders. - SmallVector PlaceholderToNewVal(Placeholders.size()); - for (auto [Index, Placeholder] : enumerate(Placeholders)) { - Value *NewVal = Updater.GetValueInMiddleOfBlock(Placeholder->getParent()); - PlaceholderToNewVal[Index] = NewVal; - Placeholder->replaceAllUsesWith(NewVal); - } - // Note: we cannot merge this loop with the previous one because it is - // possible that the placeholder itself can be used in the SSAUpdater. The - // replaceAllUsesWith doesn't replace those uses. - for (auto [Index, Placeholder] : enumerate(Placeholders)) { - if (!Placeholder->use_empty()) - Placeholder->replaceAllUsesWith(PlaceholderToNewVal[Index]); + for (Instruction *Placeholder : Placeholders) { + Placeholder->replaceAllUsesWith( + Updater.GetValueInMiddleOfBlock(Placeholder->getParent())); Placeholder->eraseFromParent(); } diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-placeholder-replacement.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-placeholder-replacement.ll new file mode 100644 index 000000000000..f3993d1a6621 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-placeholder-replacement.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca %s | FileCheck %s + +; This used to crash when we deplay the value replacement in AMDGPUPromoteAlloca. +; The test might be helpful to catch similar issue around placeholder replacement. + +define <2 x i32> @replace_placeholder_correctly() { +; CHECK-LABEL: define <2 x i32> @replace_placeholder_correctly() { +; CHECK-NEXT: [[A:%.*]] = freeze <2 x i32> poison +; CHECK-NEXT: br label %[[BB2:.*]] +; CHECK: [[BB2]]: +; CHECK-NEXT: ret <2 x i32> [[A]] +; + %a = alloca <2 x i32>, align 8, addrspace(5) + br label %ret + +ret: + %v = load <2 x i32>, ptr addrspace(5) %a, align 8 + ret <2 x i32> %v +} diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-use-after-erase.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-use-after-erase.ll index 8d6cee48b416..48f12ad33402 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-use-after-erase.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-use-after-erase.ll @@ -4,7 +4,7 @@ define amdgpu_kernel void @kernel(ptr %p) { ; CHECK-LABEL: define amdgpu_kernel void @kernel( ; CHECK-SAME: ptr [[P:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ASCAST:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1) ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <14 x i32> poison ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[ASCAST]], align 4 @@ -13,8 +13,7 @@ define amdgpu_kernel void @kernel(ptr %p) { ; CHECK: [[BB_2]]: ; CHECK-NEXT: br label %[[BB_1]] ; CHECK: [[BB_1]]: -; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <14 x i32> [ [[ALLOCA]], %[[BB_2]] ], [ [[ALLOCA]], %[[ENTRY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <14 x i32> [[PROMOTEALLOCA]], i32 0, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <14 x i32> [[ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: ret void ; entry: