[AMDGPU] Apply alignment attr for make.buffer.rsrc (#166914)
Calculating alignment for `make.buffer.rsrc` intrinsic. The logic is the
alignment on use of return value of `make.buffer.rsrc` should be capped
by the base operand's alignment of `make.buffer.rsrc`.
For example:
```ll
define float @foo(ptr addrspace(1) align X %ptr) {
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 C, i32 0)
%y = load float, ptr addrspace(7) %fat.ptr, align Y
ret float %y
}
```
We hopes that `Y = min(X, Y)`
---
After discussion, it seems improper for letting `Y = min(X, Y)` since it
contradict with the semantic of align on load.
So we would apply the origin behavior of align, which is letting `X` and
`Y` both equal to `max(X, Y)`
---------
Co-authored-by: Shilei Tian <i@tianshilei.me>
This commit is contained in:
parent
6ec8c4351c
commit
e442904e70
@ -1603,7 +1603,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
|
||||
&AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
|
||||
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
|
||||
&AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
|
||||
&AAAMDGPUClusterDims::ID});
|
||||
&AAAMDGPUClusterDims::ID, &AAAlign::ID});
|
||||
|
||||
AttributorConfig AC(CGUpdater);
|
||||
AC.IsClosedWorldModule = Options.IsClosedWorld;
|
||||
@ -1661,6 +1661,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
|
||||
if (Ptr) {
|
||||
A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
|
||||
A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
|
||||
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
|
||||
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
|
||||
A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -5220,6 +5220,13 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
|
||||
return AlignAA->getKnownAlign().value();
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_make_buffer_rsrc: {
|
||||
const auto *AlignAA = A.getAAFor<AAAlign>(
|
||||
QueryingAA, IRPosition::value(*II), DepClassTy::NONE);
|
||||
if (AlignAA)
|
||||
return AlignAA->getKnownAlign().value();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -5543,7 +5550,7 @@ struct AAAlignCallSiteReturned final
|
||||
const auto *AlignAA =
|
||||
A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
|
||||
DepClassTy::REQUIRED);
|
||||
if (AlignAA && AlignAA->isValidState()) {
|
||||
if (AlignAA) {
|
||||
Alignment = std::max(AlignAA->getAssumedAlign(), Alignment);
|
||||
Valid = true;
|
||||
}
|
||||
@ -5554,6 +5561,18 @@ struct AAAlignCallSiteReturned final
|
||||
std::min(this->getAssumedAlign(), Alignment).value());
|
||||
break;
|
||||
}
|
||||
// FIXME: Should introduce target specific sub-attributes and letting
|
||||
// getAAfor<AAAlign> lead to create sub-attribute to handle target
|
||||
// specific intrinsics.
|
||||
case Intrinsic::amdgcn_make_buffer_rsrc: {
|
||||
const auto *AlignAA =
|
||||
A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
|
||||
DepClassTy::REQUIRED);
|
||||
if (AlignAA)
|
||||
return clampStateAndIndicateChange<StateType>(
|
||||
this->getState(), AlignAA->getAssumedAlign().value());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
40
llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
Normal file
40
llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
|
||||
|
||||
define float @align_back_prop(ptr addrspace(1) align 4 %x) {
|
||||
; CHECK-LABEL: define float @align_back_prop(
|
||||
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0)
|
||||
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
|
||||
; CHECK-NEXT: ret float [[Y]]
|
||||
;
|
||||
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0)
|
||||
%y = load float, ptr addrspace(7) %fat.ptr, align 8
|
||||
ret float %y
|
||||
}
|
||||
|
||||
define float @align_foward_prop(ptr addrspace(1) align 8 %x) {
|
||||
; CHECK-LABEL: define float @align_foward_prop(
|
||||
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0)
|
||||
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
|
||||
; CHECK-NEXT: ret float [[Y]]
|
||||
;
|
||||
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0)
|
||||
%y = load float, ptr addrspace(7) %fat.ptr, align 4
|
||||
ret float %y
|
||||
}
|
||||
|
||||
define float @align_mix_prop(ptr addrspace(1) align 4 %x) {
|
||||
; CHECK-LABEL: define float @align_mix_prop(
|
||||
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0)
|
||||
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
|
||||
; CHECK-NEXT: [[Z:%.*]] = load float, ptr addrspace(1) [[X]], align 8
|
||||
; CHECK-NEXT: ret float [[Z]]
|
||||
;
|
||||
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0)
|
||||
%y = load float, ptr addrspace(7) %fat.ptr, align 2
|
||||
%z = load float, ptr addrspace(1) %x, align 8
|
||||
ret float %z
|
||||
}
|
||||
@ -305,7 +305,7 @@ define amdgpu_kernel void @test_call_untouched_ptr() {
|
||||
|
||||
define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer(
|
||||
; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
@ -320,7 +320,7 @@ define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
|
||||
|
||||
define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) {
|
||||
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias(
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11]]
|
||||
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]]
|
||||
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user