AMDGPU: Simplify placeholder replacement in AMDGPUPromoteAlloca (#188202)

If `promoteAllocaUserToVector` returns the placeholder, it means the
instruction does not actually modify the alloca. we don't need to add
the placeholder as block available value for correctness. Instructions
appear afterwards in the the same block could still get the placeholder
as source value through GetCurVal() call. Instructions in other block
which access the alloca will be set up later when we really do
placeholder replacement.

This help simplify the placeholder replacement logic.
This commit is contained in:
Ruiling, Song 2026-03-25 15:26:44 +08:00 committed by GitHub
parent f0b58c16e1
commit d69fc653c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 35 additions and 18 deletions

View File

@ -1140,7 +1140,7 @@ void AMDGPUPromoteAllocaImpl::promoteAllocaToVector(AllocaAnalysis &AA) {
//
// Insert a placeholder whenever we need the vector value at the top of a
// basic block.
SmallVector<Instruction *> Placeholders;
SmallSetVector<Instruction *, 8> Placeholders;
forEachWorkListItem(AA.Vector.Worklist, [&](Instruction *I) {
BasicBlock *BB = I->getParent();
auto GetCurVal = [&]() -> Value * {
@ -1155,29 +1155,27 @@ void AMDGPUPromoteAllocaImpl::promoteAllocaToVector(AllocaAnalysis &AA) {
IRBuilder<> Builder(I);
auto *Placeholder = cast<Instruction>(Builder.CreateFreeze(
PoisonValue::get(AA.Vector.Ty), "promotealloca.placeholder"));
Placeholders.push_back(Placeholder);
Placeholders.insert(Placeholder);
return Placeholders.back();
};
Value *Result = promoteAllocaUserToVector(I, *DL, AA, VecStoreSize,
ElementSize, GetCurVal);
if (Result)
// If the returned result is a placeholder, it means the instruction does
// not really modify the alloca. So no need to make it being available value
// to SSAUpdater.
// This will stop placeholder being cached in SSAUpdater. The cached
// placeholder may cause stale pointer being referenced when doing
// placeholder replacement.
if (Result && (!isa<Instruction>(Result) ||
!Placeholders.contains(cast<Instruction>(Result))))
Updater.AddAvailableValue(BB, Result);
});
// Now fixup the placeholders.
SmallVector<Value *> PlaceholderToNewVal(Placeholders.size());
for (auto [Index, Placeholder] : enumerate(Placeholders)) {
Value *NewVal = Updater.GetValueInMiddleOfBlock(Placeholder->getParent());
PlaceholderToNewVal[Index] = NewVal;
Placeholder->replaceAllUsesWith(NewVal);
}
// Note: we cannot merge this loop with the previous one because it is
// possible that the placeholder itself can be used in the SSAUpdater. The
// replaceAllUsesWith doesn't replace those uses.
for (auto [Index, Placeholder] : enumerate(Placeholders)) {
if (!Placeholder->use_empty())
Placeholder->replaceAllUsesWith(PlaceholderToNewVal[Index]);
for (Instruction *Placeholder : Placeholders) {
Placeholder->replaceAllUsesWith(
Updater.GetValueInMiddleOfBlock(Placeholder->getParent()));
Placeholder->eraseFromParent();
}

View File

@ -0,0 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca %s | FileCheck %s
; This used to crash when we deplay the value replacement in AMDGPUPromoteAlloca.
; The test might be helpful to catch similar issue around placeholder replacement.
define <2 x i32> @replace_placeholder_correctly() {
; CHECK-LABEL: define <2 x i32> @replace_placeholder_correctly() {
; CHECK-NEXT: [[A:%.*]] = freeze <2 x i32> poison
; CHECK-NEXT: br label %[[BB2:.*]]
; CHECK: [[BB2]]:
; CHECK-NEXT: ret <2 x i32> [[A]]
;
%a = alloca <2 x i32>, align 8, addrspace(5)
br label %ret
ret:
%v = load <2 x i32>, ptr addrspace(5) %a, align 8
ret <2 x i32> %v
}

View File

@ -4,7 +4,7 @@
define amdgpu_kernel void @kernel(ptr %p) {
; CHECK-LABEL: define amdgpu_kernel void @kernel(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[ASCAST:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <14 x i32> poison
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[ASCAST]], align 4
@ -13,8 +13,7 @@ define amdgpu_kernel void @kernel(ptr %p) {
; CHECK: [[BB_2]]:
; CHECK-NEXT: br label %[[BB_1]]
; CHECK: [[BB_1]]:
; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <14 x i32> [ [[ALLOCA]], %[[BB_2]] ], [ [[ALLOCA]], %[[ENTRY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <14 x i32> [[PROMOTEALLOCA]], i32 0, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <14 x i32> [[ALLOCA]], i32 0, i32 0
; CHECK-NEXT: ret void
;
entry: