
Previously the AnnotateKernelFeatures pass infers two attributes: amdgpu-calls and amdgpu-stack-objects, which are used to help determine if flat scratch init is allowed. PR #118907 created the amdgpu-no-flat-scratch-init attribute. Continuing with that work, this patch makes use of this attribute to determine flat scratch init, replacing amdgpu-calls and amdgpu-stack-objects. This also leads to the removal of the AnnotateKernelFeatures pass.
26 lines
1006 B
LLVM
26 lines
1006 B
LLVM
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s
|
|
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s
|
|
|
|
declare void @llvm.trap() #0
|
|
|
|
; DOORBELL: .amdhsa_kernel trap
|
|
; DOORBELL-NEXT: .amdhsa_group_segment_fixed_size 0
|
|
; DOORBELL-NEXT: .amdhsa_private_segment_fixed_size 0
|
|
; DOORBELL-NEXT: .amdhsa_kernarg_size 8
|
|
; DOORBELL-NEXT: .amdhsa_user_sgpr_count 14
|
|
; DOORBELL-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; DOORBELL: .end_amdhsa_kernel
|
|
|
|
define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) #0 {
|
|
store volatile i32 1, ptr addrspace(1) %arg0
|
|
call void @llvm.trap()
|
|
unreachable
|
|
store volatile i32 2, ptr addrspace(1) %arg0
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "amdgpu-no-implicitarg-ptr" }
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
|