llvm-project/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
Shilei Tian 578741b5e8
[AMDGPU][Attributor] Rework update of AAAMDWavesPerEU (#123995)
Currently, we use `AAAMDWavesPerEU` to iteratively update values based
on attributes from the associated function, potentially propagating
user-annotated values, along with `AAAMDFlatWorkGroupSize`. Similarly,
we have `AAAMDFlatWorkGroupSize`. However, since the value calculated
through the flat workgroup size always dominates the user annotation
(i.e., the attribute), running `AAAMDWavesPerEU` iteratively is
unnecessary if no user-annotated value exists.

This PR completely rewrites how the `amdgpu-waves-per-eu` attribute is
handled in `AMDGPUAttributor`. The key changes are as follows:

- `AAAMDFlatWorkGroupSize` remains unchanged.
- `AAAMDWavesPerEU` now only propagates user-annotated values.
- A new function is added to check and update `amdgpu-waves-per-eu`
based on the following rules:
- No waves per eu, no flat workgroup size: Assume a flat workgroup size
of `1,1024` and compute waves per eu based on this.
- No waves per eu, flat workgroup size exists: Use the provided flat
workgroup size to compute waves-per-eu.
- Waves per eu exists, no flat workgroup size: This is a tricky case. In
this PR, we assume a flat workgroup size of `1,1024`, but this can be
adjusted if a different approach is preferred. Alternatively, we could
directly use the user-annotated value.
- Both waves per eu and flat workgroup size exist: If there’s a
conflict, the value derived from the flat workgroup size takes
precedence over waves per eu.

This PR also updates the logic for merging two waves per eu pairs. The
current implementation, which uses `clampStateAndIndicateChange` to
compute a union, might not be ideal. If we think from ensure proper
resource allocation perspective, for instance, if one pair specifies a
minimum of 2 waves per eu, and another specifies a minimum of 4, we
should guarantee that 4 waves per eu can be supported, as failing to do
so could result in excessive resource allocation per wave. A similar
principle applies to the upper bound. Thus, the PR uses the following
approach for merging two pairs, `lo_a,up_a` and `lo_b,up_b`: `max(lo_a,
lo_b), max(up_a, up_b)`. This ensures that resource allocation adheres
to the stricter constraints from both inputs.

Fix #123092.
2025-05-17 01:01:09 -04:00

721 lines
36 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA %s
; TODO: The test contains UB which is refined by the Attributor and should be removed.
declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.y() #0
declare i32 @llvm.amdgcn.workgroup.id.z() #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.z() #0
declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0
define void @use_workitem_id_x() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x
; ATTRIBUTOR_HSA-SAME: () #[[ATTR1:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) poison, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
define void @use_workitem_id_y() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workitem_id_y
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) poison, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
define void @use_workitem_id_z() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workitem_id_z
; ATTRIBUTOR_HSA-SAME: () #[[ATTR3:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) poison, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
define void @use_workgroup_id_x() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_x
; ATTRIBUTOR_HSA-SAME: () #[[ATTR4:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) poison, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val = call i32 @llvm.amdgcn.workgroup.id.x()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
define void @use_workgroup_id_y() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y
; ATTRIBUTOR_HSA-SAME: () #[[ATTR5:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) poison, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
define void @use_workgroup_id_z() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_z
; ATTRIBUTOR_HSA-SAME: () #[[ATTR6:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) poison, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
define void @use_dispatch_ptr() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
; ATTRIBUTOR_HSA-SAME: () #[[ATTR7:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
; ATTRIBUTOR_HSA-NEXT: store volatile ptr addrspace(4) [[DISPATCH_PTR]], ptr addrspace(1) poison, align 8
; ATTRIBUTOR_HSA-NEXT: ret void
;
%dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
store volatile ptr addrspace(4) %dispatch.ptr, ptr addrspace(1) poison
ret void
}
define void @use_queue_ptr() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_queue_ptr
; ATTRIBUTOR_HSA-SAME: () #[[ATTR8:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[QUEUE_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
; ATTRIBUTOR_HSA-NEXT: store volatile ptr addrspace(4) [[QUEUE_PTR]], ptr addrspace(1) poison, align 8
; ATTRIBUTOR_HSA-NEXT: ret void
;
%queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
store volatile ptr addrspace(4) %queue.ptr, ptr addrspace(1) poison
ret void
}
define void @use_dispatch_id() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_id
; ATTRIBUTOR_HSA-SAME: () #[[ATTR9:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
; ATTRIBUTOR_HSA-NEXT: store volatile i64 [[VAL]], ptr addrspace(1) poison, align 8
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val = call i64 @llvm.amdgcn.dispatch.id()
store volatile i64 %val, ptr addrspace(1) poison
ret void
}
define void @use_workgroup_id_y_workgroup_id_z() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z
; ATTRIBUTOR_HSA-SAME: () #[[ATTR10:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
; ATTRIBUTOR_HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL0]], ptr addrspace(1) poison, align 4
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL1]], ptr addrspace(1) poison, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, ptr addrspace(1) poison
store volatile i32 %val1, ptr addrspace(1) poison
ret void
}
define void @func_indirect_use_workitem_id_x() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x
; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_workitem_id_x()
ret void
}
define void @kernel_indirect_use_workitem_id_x() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x
; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_workitem_id_x()
ret void
}
define void @func_indirect_use_workitem_id_y() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_y()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_workitem_id_y()
ret void
}
define void @func_indirect_use_workitem_id_z() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z
; ATTRIBUTOR_HSA-SAME: () #[[ATTR3]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_z()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_workitem_id_z()
ret void
}
define void @func_indirect_use_workgroup_id_x() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x
; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_workgroup_id_x()
ret void
}
define void @kernel_indirect_use_workgroup_id_x() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x
; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_workgroup_id_x()
ret void
}
define void @func_indirect_use_workgroup_id_y() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y
; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_y()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_workgroup_id_y()
ret void
}
define void @func_indirect_use_workgroup_id_z() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z
; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_z()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_workgroup_id_z()
ret void
}
define void @func_indirect_indirect_use_workgroup_id_y() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y
; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_workgroup_id_y()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @func_indirect_use_workgroup_id_y()
ret void
}
define void @indirect_x2_use_workgroup_id_y() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y
; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @func_indirect_indirect_use_workgroup_id_y()
ret void
}
define void @func_indirect_use_dispatch_ptr() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr
; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_ptr()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_dispatch_ptr()
ret void
}
define void @func_indirect_use_queue_ptr() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr
; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_queue_ptr()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_queue_ptr()
ret void
}
define void @func_indirect_use_dispatch_id() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id
; ATTRIBUTOR_HSA-SAME: () #[[ATTR9]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_id()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_dispatch_id()
ret void
}
define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z
; ATTRIBUTOR_HSA-SAME: () #[[ATTR11:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
ret void
}
define void @recursive_use_workitem_id_y() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) poison, align 4
; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y()
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %val, ptr addrspace(1) poison
call void @recursive_use_workitem_id_y()
ret void
}
define void @call_recursive_use_workitem_id_y() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @recursive_use_workitem_id_y()
ret void
}
define void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4)
store volatile i32 0, ptr addrspace(4) %stof
ret void
}
define void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) %ptr) #2 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR13:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4)
store volatile i32 0, ptr addrspace(4) %stof
ret void
}
define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) %ptr) #2 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR14:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_queue_ptr()
; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4)
store volatile i32 0, ptr addrspace(4) %stof
call void @func_indirect_use_queue_ptr()
ret void
}
define void @indirect_use_group_to_flat_addrspacecast() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast
; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast(ptr addrspace(3) null)
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_group_to_flat_addrspacecast(ptr addrspace(3) null)
ret void
}
define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9
; ATTRIBUTOR_HSA-SAME: () #[[ATTR11]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) null)
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) null)
ret void
}
define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9
; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) null)
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) null)
ret void
}
define void @use_kernarg_segment_ptr() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
; ATTRIBUTOR_HSA-SAME: () #[[ATTR11]] {
; ATTRIBUTOR_HSA-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; ATTRIBUTOR_HSA-NEXT: store volatile ptr addrspace(4) [[KERNARG_SEGMENT_PTR]], ptr addrspace(1) poison, align 8
; ATTRIBUTOR_HSA-NEXT: ret void
;
%kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
store volatile ptr addrspace(4) %kernarg.segment.ptr, ptr addrspace(1) poison
ret void
}
define void @func_indirect_use_kernarg_segment_ptr() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr
; ATTRIBUTOR_HSA-SAME: () #[[ATTR11]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_kernarg_segment_ptr()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_kernarg_segment_ptr()
ret void
}
define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; ATTRIBUTOR_HSA-NEXT: store volatile ptr addrspace(4) [[IMPLICITARG_PTR]], ptr addrspace(1) poison, align 8
; ATTRIBUTOR_HSA-NEXT: ret void
;
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
store volatile ptr addrspace(4) %implicitarg.ptr, ptr addrspace(1) poison
ret void
}
define void @use_implicitarg_ptr() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr
; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; ATTRIBUTOR_HSA-NEXT: store volatile ptr addrspace(4) [[IMPLICITARG_PTR]], ptr addrspace(1) poison, align 8
; ATTRIBUTOR_HSA-NEXT: ret void
;
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
store volatile ptr addrspace(4) %implicitarg.ptr, ptr addrspace(1) poison
ret void
}
define void @func_indirect_use_implicitarg_ptr() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr
; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
; ATTRIBUTOR_HSA-NEXT: call void @use_implicitarg_ptr()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @use_implicitarg_ptr()
ret void
}
declare void @external.func() #3
; This function gets deleted.
define internal void @defined.func() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@defined.func
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
}
define void @func_call_external() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_external
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @external.func()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @external.func()
ret void
}
define void @func_call_defined() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_defined
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
; ATTRIBUTOR_HSA-NEXT: call void @defined.func()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @defined.func()
ret void
}
define void @func_call_asm() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR25:[0-9]+]]
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void asm sideeffect "", ""() #3
ret void
}
define amdgpu_kernel void @kern_call_external() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_external
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
; ATTRIBUTOR_HSA-NEXT: call void @external.func()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @external.func()
ret void
}
define amdgpu_kernel void @func_kern_defined() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
; ATTRIBUTOR_HSA-NEXT: call void @defined.func()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @defined.func()
ret void
}
define i32 @use_dispatch_ptr_ret_type() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] {
; ATTRIBUTOR_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
; ATTRIBUTOR_HSA-NEXT: store volatile ptr addrspace(4) [[DISPATCH_PTR]], ptr addrspace(1) poison, align 8
; ATTRIBUTOR_HSA-NEXT: ret i32 0
;
%dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
store volatile ptr addrspace(4) %dispatch.ptr, ptr addrspace(1) poison
ret i32 0
}
define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] {
; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @use_dispatch_ptr_ret_type()
; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]]
;
%f = call float @use_dispatch_ptr_ret_type()
%fadd = fadd float %f, 1.0
ret float %fadd
}
define float @func_indirect_call(ptr %fptr) #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] {
; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]()
; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]]
;
%f = call float %fptr()
%fadd = fadd float %f, 1.0
ret float %fadd
}
declare float @extern() #3
define float @func_extern_call() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_extern_call
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @extern()
; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]]
;
%f = call float @extern()
%fadd = fadd float %f, 1.0
ret float %fadd
}
define float @func_null_call(ptr %fptr) #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_null_call
; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] {
; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float null()
; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]]
;
%f = call float null()
%fadd = fadd float %f, 1.0
ret float %fadd
}
declare float @llvm.amdgcn.rcp.f32(float) #0
; Calls some other recognized intrinsic
define float @func_other_intrinsic_call(float %arg) #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call
; ATTRIBUTOR_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR16]] {
; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]])
; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]]
;
%f = call float @llvm.amdgcn.rcp.f32(float %arg)
%fadd = fadd float %f, 1.0
ret float %fadd
}
; Hostcall needs to be enabled for sanitizers
define amdgpu_kernel void @kern_sanitize_address() #4 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
store volatile i32 0, ptr addrspace(1) null
ret void
}
; Hostcall needs to be enabled for sanitizers
define void @func_sanitize_address() #4 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
store volatile i32 0, ptr addrspace(1) null
ret void
}
; Hostcall needs to be enabled for sanitizers
define void @func_indirect_sanitize_address() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @func_sanitize_address()
ret void
}
; Hostcall needs to be enabled for sanitizers
define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @func_sanitize_address()
ret void
}
; Marked with amdgpu-no-implicitarg-ptr, and
; sanitize_address. sanitize_address wins and requires the pointer.
declare void @extern_func_sanitize_address() #5
define amdgpu_kernel void @kern_decl_sanitize_address() #3 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_decl_sanitize_address
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
; ATTRIBUTOR_HSA-NEXT: call void @extern_func_sanitize_address()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @extern_func_sanitize_address()
ret void
}
declare void @enqueue_block_decl() #6
define internal void @enqueue_block_def() #6 {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
}
define amdgpu_kernel void @kern_call_enqueued_block_decl() {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @enqueue_block_decl()
ret void
}
define amdgpu_kernel void @kern_call_enqueued_block_def() {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def()
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @enqueue_block_def()
ret void
}
define void @unused_enqueue_block() {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
}
define internal void @known_func() {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
}
; Should never happen
define amdgpu_kernel void @kern_callsite_enqueue_block() {
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR26:[0-9]+]]
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @known_func() #6
ret void
}
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind "target-cpu"="fiji" }
attributes #2 = { nounwind "target-cpu"="gfx900" }
attributes #3 = { nounwind }
attributes #4 = { nounwind sanitize_address }
attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
attributes #6 = { "enqueued-block" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR15:[0-9]+]] = { nounwind "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR21:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { nounwind }
; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "enqueued-block" }
;.