AMDGPU: Simplify placeholder replacement in AMDGPUPromoteAlloca (#188202)
If `promoteAllocaUserToVector` returns the placeholder, it means the instruction does not actually modify the alloca. we don't need to add the placeholder as block available value for correctness. Instructions appear afterwards in the the same block could still get the placeholder as source value through GetCurVal() call. Instructions in other block which access the alloca will be set up later when we really do placeholder replacement. This help simplify the placeholder replacement logic.
This commit is contained in:
parent
f0b58c16e1
commit
d69fc653c1
@ -1140,7 +1140,7 @@ void AMDGPUPromoteAllocaImpl::promoteAllocaToVector(AllocaAnalysis &AA) {
|
||||
//
|
||||
// Insert a placeholder whenever we need the vector value at the top of a
|
||||
// basic block.
|
||||
SmallVector<Instruction *> Placeholders;
|
||||
SmallSetVector<Instruction *, 8> Placeholders;
|
||||
forEachWorkListItem(AA.Vector.Worklist, [&](Instruction *I) {
|
||||
BasicBlock *BB = I->getParent();
|
||||
auto GetCurVal = [&]() -> Value * {
|
||||
@ -1155,29 +1155,27 @@ void AMDGPUPromoteAllocaImpl::promoteAllocaToVector(AllocaAnalysis &AA) {
|
||||
IRBuilder<> Builder(I);
|
||||
auto *Placeholder = cast<Instruction>(Builder.CreateFreeze(
|
||||
PoisonValue::get(AA.Vector.Ty), "promotealloca.placeholder"));
|
||||
Placeholders.push_back(Placeholder);
|
||||
Placeholders.insert(Placeholder);
|
||||
return Placeholders.back();
|
||||
};
|
||||
|
||||
Value *Result = promoteAllocaUserToVector(I, *DL, AA, VecStoreSize,
|
||||
ElementSize, GetCurVal);
|
||||
if (Result)
|
||||
// If the returned result is a placeholder, it means the instruction does
|
||||
// not really modify the alloca. So no need to make it being available value
|
||||
// to SSAUpdater.
|
||||
// This will stop placeholder being cached in SSAUpdater. The cached
|
||||
// placeholder may cause stale pointer being referenced when doing
|
||||
// placeholder replacement.
|
||||
if (Result && (!isa<Instruction>(Result) ||
|
||||
!Placeholders.contains(cast<Instruction>(Result))))
|
||||
Updater.AddAvailableValue(BB, Result);
|
||||
});
|
||||
|
||||
// Now fixup the placeholders.
|
||||
SmallVector<Value *> PlaceholderToNewVal(Placeholders.size());
|
||||
for (auto [Index, Placeholder] : enumerate(Placeholders)) {
|
||||
Value *NewVal = Updater.GetValueInMiddleOfBlock(Placeholder->getParent());
|
||||
PlaceholderToNewVal[Index] = NewVal;
|
||||
Placeholder->replaceAllUsesWith(NewVal);
|
||||
}
|
||||
// Note: we cannot merge this loop with the previous one because it is
|
||||
// possible that the placeholder itself can be used in the SSAUpdater. The
|
||||
// replaceAllUsesWith doesn't replace those uses.
|
||||
for (auto [Index, Placeholder] : enumerate(Placeholders)) {
|
||||
if (!Placeholder->use_empty())
|
||||
Placeholder->replaceAllUsesWith(PlaceholderToNewVal[Index]);
|
||||
for (Instruction *Placeholder : Placeholders) {
|
||||
Placeholder->replaceAllUsesWith(
|
||||
Updater.GetValueInMiddleOfBlock(Placeholder->getParent()));
|
||||
Placeholder->eraseFromParent();
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,20 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca %s | FileCheck %s
|
||||
|
||||
; This used to crash when we deplay the value replacement in AMDGPUPromoteAlloca.
|
||||
; The test might be helpful to catch similar issue around placeholder replacement.
|
||||
|
||||
define <2 x i32> @replace_placeholder_correctly() {
|
||||
; CHECK-LABEL: define <2 x i32> @replace_placeholder_correctly() {
|
||||
; CHECK-NEXT: [[A:%.*]] = freeze <2 x i32> poison
|
||||
; CHECK-NEXT: br label %[[BB2:.*]]
|
||||
; CHECK: [[BB2]]:
|
||||
; CHECK-NEXT: ret <2 x i32> [[A]]
|
||||
;
|
||||
%a = alloca <2 x i32>, align 8, addrspace(5)
|
||||
br label %ret
|
||||
|
||||
ret:
|
||||
%v = load <2 x i32>, ptr addrspace(5) %a, align 8
|
||||
ret <2 x i32> %v
|
||||
}
|
||||
@ -4,7 +4,7 @@
|
||||
define amdgpu_kernel void @kernel(ptr %p) {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @kernel(
|
||||
; CHECK-SAME: ptr [[P:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[ASCAST:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
|
||||
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <14 x i32> poison
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[ASCAST]], align 4
|
||||
@ -13,8 +13,7 @@ define amdgpu_kernel void @kernel(ptr %p) {
|
||||
; CHECK: [[BB_2]]:
|
||||
; CHECK-NEXT: br label %[[BB_1]]
|
||||
; CHECK: [[BB_1]]:
|
||||
; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <14 x i32> [ [[ALLOCA]], %[[BB_2]] ], [ [[ALLOCA]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <14 x i32> [[PROMOTEALLOCA]], i32 0, i32 0
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <14 x i32> [[ALLOCA]], i32 0, i32 0
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user