If a function has `amdgpu-flat-work-group-size`, honor it in `initialize` by taking its value directly; otherwise, it uses the default range as a starting point. We will no longer manipulate the known range, which can cause issues because the known range is a "throttle" to the assumed range such that the assumed range can't get widened properly in `updateImpl` if the known range is not set properly for whatever reasons. Another benefit of not touching the known range is, if we indicate pessimistic state, it also invalidates the AA such that `manifest` will not be called. Since we honor the attribute, we don't want and will not add any half-baked attribute added to a function.
897 lines
42 KiB
LLVM
897 lines
42 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
|
|
; Test the generation of the attribute amdgpu-no-flat-scratch-init
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX9 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX10 %s
|
|
|
|
;; tests of addrspacecast
|
|
|
|
;.
|
|
; GFX9: @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
|
|
;.
|
|
; GFX10: @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
|
|
;.
|
|
define void @without_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: define void @without_global_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @without_global_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(1) %ptr
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(1) %ptr
|
|
ret void
|
|
}
|
|
|
|
define void @with_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: define void @with_global_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @with_global_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define void @without_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) {
|
|
; GFX9-LABEL: define void @without_region_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(2) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @without_region_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(2) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(2) %ptr
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(2) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(2) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(2) %ptr
|
|
ret void
|
|
}
|
|
|
|
define void @with_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) {
|
|
; GFX9-LABEL: define void @with_region_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @with_region_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(2) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(2) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define void @without_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) {
|
|
; GFX9-LABEL: define void @without_group_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @without_group_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(3) %ptr
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @without_group_to_flat_addrspacecast_cc_kernel(ptr addrspace(3) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @without_group_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @without_group_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(3) %ptr
|
|
ret void
|
|
}
|
|
|
|
define void @with_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) {
|
|
; GFX9-LABEL: define void @with_group_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @with_group_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(ptr addrspace(3) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define void @without_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) {
|
|
; GFX9-LABEL: define void @without_constant_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(4) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @without_constant_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(4) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(4) %ptr
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @without_constant_to_flat_addrspacecast_cc_kernel(ptr addrspace(4) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @without_constant_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(4) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @without_constant_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(4) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(4) %ptr
|
|
ret void
|
|
}
|
|
|
|
define void @with_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) {
|
|
; GFX9-LABEL: define void @with_constant_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @with_constant_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(ptr addrspace(4) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define void @without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @without_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @without_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(5) %ptr
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile i32 0, ptr addrspace(5) %ptr
|
|
ret void
|
|
}
|
|
|
|
define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @with_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @with_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
ret void
|
|
}
|
|
|
|
define void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @call_without_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @call_without_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @call_with_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @call_with_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @call_both_with_and_without_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @call_both_with_and_without_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define void @call_call_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @call_call_without_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @call_call_without_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
|
|
; GFX10-NEXT: call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define void @call_call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @call_call_with_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @call_call_with_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define void @call_call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @call_call_both_with_and_without_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @call_call_both_with_and_without_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define void @with_cast_call_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
call void @without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
call void @without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define void @with_cast_call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
|
|
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
|
|
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
|
|
store volatile i32 0, ptr %stof
|
|
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
|
|
ret void
|
|
}
|
|
|
|
;; tests of addrspacecast in a constant
|
|
|
|
define amdgpu_kernel void @private_constant_expression_use(ptr addrspace(1) nocapture %out) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @private_constant_expression_use(
|
|
; GFX9-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @private_constant_expression_use(
|
|
; GFX10-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
;; tests of indirect call, intrinsics, inline asm
|
|
|
|
@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
|
|
|
|
define void @with_indirect_call() {
|
|
; GFX9-LABEL: define void @with_indirect_call(
|
|
; GFX9-SAME: ) #[[ATTR2:[0-9]+]] {
|
|
; GFX9-NEXT: [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8
|
|
; GFX9-NEXT: call void [[FPTR]]()
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @with_indirect_call(
|
|
; GFX10-SAME: ) #[[ATTR2:[0-9]+]] {
|
|
; GFX10-NEXT: [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8
|
|
; GFX10-NEXT: call void [[FPTR]]()
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%fptr = load ptr, ptr addrspace(4) @gv.fptr0
|
|
call void %fptr()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @with_indirect_call_cc_kernel() {
|
|
; GFX9-LABEL: define amdgpu_kernel void @with_indirect_call_cc_kernel(
|
|
; GFX9-SAME: ) #[[ATTR2]] {
|
|
; GFX9-NEXT: [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8
|
|
; GFX9-NEXT: call void [[FPTR]]()
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @with_indirect_call_cc_kernel(
|
|
; GFX10-SAME: ) #[[ATTR2]] {
|
|
; GFX10-NEXT: [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8
|
|
; GFX10-NEXT: call void [[FPTR]]()
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%fptr = load ptr, ptr addrspace(4) @gv.fptr0
|
|
call void %fptr()
|
|
ret void
|
|
}
|
|
|
|
define void @call_with_indirect_call() {
|
|
; GFX9-LABEL: define void @call_with_indirect_call(
|
|
; GFX9-SAME: ) #[[ATTR2]] {
|
|
; GFX9-NEXT: call void @with_indirect_call()
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @call_with_indirect_call(
|
|
; GFX10-SAME: ) #[[ATTR2]] {
|
|
; GFX10-NEXT: call void @with_indirect_call()
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @with_indirect_call()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @call_with_indirect_call_cc_kernel() {
|
|
; GFX9-LABEL: define amdgpu_kernel void @call_with_indirect_call_cc_kernel(
|
|
; GFX9-SAME: ) #[[ATTR2]] {
|
|
; GFX9-NEXT: call void @with_indirect_call()
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @call_with_indirect_call_cc_kernel(
|
|
; GFX10-SAME: ) #[[ATTR2]] {
|
|
; GFX10-NEXT: call void @with_indirect_call()
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @with_indirect_call()
|
|
ret void
|
|
}
|
|
|
|
define void @empty() {
|
|
; GFX9-LABEL: define void @empty(
|
|
; GFX9-SAME: ) #[[ATTR0]] {
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @empty(
|
|
; GFX10-SAME: ) #[[ATTR0]] {
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define void @also_empty() {
|
|
; GFX9-LABEL: define void @also_empty(
|
|
; GFX9-SAME: ) #[[ATTR0]] {
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @also_empty(
|
|
; GFX10-SAME: ) #[[ATTR0]] {
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
|
|
; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
; GFX9-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
|
|
; GFX9-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
|
|
; GFX9-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
|
|
; GFX9: [[BB2]]:
|
|
; GFX9-NEXT: call void @also_empty()
|
|
; GFX9-NEXT: br label %[[BB6:.*]]
|
|
; GFX9: [[BB3]]:
|
|
; GFX9-NEXT: br i1 true, label %[[BB4:.*]], label %[[BB5:.*]]
|
|
; GFX9: [[BB4]]:
|
|
; GFX9-NEXT: call void @empty()
|
|
; GFX9-NEXT: br label %[[BB6]]
|
|
; GFX9: [[BB5]]:
|
|
; GFX9-NEXT: unreachable
|
|
; GFX9: [[BB6]]:
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
|
|
; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
; GFX10-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
|
|
; GFX10-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
|
|
; GFX10-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
|
|
; GFX10: [[BB2]]:
|
|
; GFX10-NEXT: call void @also_empty()
|
|
; GFX10-NEXT: br label %[[BB6:.*]]
|
|
; GFX10: [[BB3]]:
|
|
; GFX10-NEXT: br i1 true, label %[[BB4:.*]], label %[[BB5:.*]]
|
|
; GFX10: [[BB4]]:
|
|
; GFX10-NEXT: call void @empty()
|
|
; GFX10-NEXT: br label %[[BB6]]
|
|
; GFX10: [[BB5]]:
|
|
; GFX10-NEXT: unreachable
|
|
; GFX10: [[BB6]]:
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%fptr = select i1 %cond, ptr @empty, ptr @also_empty
|
|
call void %fptr()
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workgroup.id.x()
|
|
|
|
define void @use_intrinsic_workitem_id_x() {
|
|
; GFX9-LABEL: define void @use_intrinsic_workitem_id_x(
|
|
; GFX9-SAME: ) #[[ATTR5:[0-9]+]] {
|
|
; GFX9-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
|
; GFX9-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @use_intrinsic_workitem_id_x(
|
|
; GFX10-SAME: ) #[[ATTR5:[0-9]+]] {
|
|
; GFX10-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
|
; GFX10-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%val = call i32 @llvm.amdgcn.workitem.id.x()
|
|
store volatile i32 %val, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel() {
|
|
; GFX9-LABEL: define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel(
|
|
; GFX9-SAME: ) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
|
; GFX9-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel(
|
|
; GFX10-SAME: ) #[[ATTR0]] {
|
|
; GFX10-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
|
; GFX10-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%val = call i32 @llvm.amdgcn.workitem.id.x()
|
|
store volatile i32 %val, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @call_use_intrinsic_workitem_id_x() {
|
|
; GFX9-LABEL: define void @call_use_intrinsic_workitem_id_x(
|
|
; GFX9-SAME: ) #[[ATTR5]] {
|
|
; GFX9-NEXT: call void @use_intrinsic_workitem_id_x()
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define void @call_use_intrinsic_workitem_id_x(
|
|
; GFX10-SAME: ) #[[ATTR5]] {
|
|
; GFX10-NEXT: call void @use_intrinsic_workitem_id_x()
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @use_intrinsic_workitem_id_x()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel() {
|
|
; GFX9-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
|
|
; GFX9-SAME: ) #[[ATTR5]] {
|
|
; GFX9-NEXT: call void @use_intrinsic_workitem_id_x()
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
|
|
; GFX10-SAME: ) #[[ATTR5]] {
|
|
; GFX10-NEXT: call void @use_intrinsic_workitem_id_x()
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @use_intrinsic_workitem_id_x()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
|
|
; GFX9-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
|
|
; GFX10-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr)
|
|
store volatile i32 7, ptr %1, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) {
|
|
; GFX9-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(
|
|
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX9-NEXT: call void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) [[PTR]])
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(
|
|
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
|
|
; GFX10-NEXT: call void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) [[PTR]])
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @with_inline_asm() {
|
|
; GFX9-LABEL: define amdgpu_kernel void @with_inline_asm(
|
|
; GFX9-SAME: ) #[[ATTR3]] {
|
|
; GFX9-NEXT: call void asm sideeffect "
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
; GFX10-LABEL: define amdgpu_kernel void @with_inline_asm(
|
|
; GFX10-SAME: ) #[[ATTR3]] {
|
|
; GFX10-NEXT: call void asm sideeffect "
|
|
; GFX10-NEXT: ret void
|
|
;
|
|
call void asm sideeffect "; use $0", "a"(i32 poison)
|
|
ret void
|
|
}
|
|
|
|
;.
|
|
; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
|
; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
|
; GFX9: attributes #[[ATTR2]] = { "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
|
; GFX9: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
|
; GFX9: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
|
|
; GFX9: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
|
;.
|
|
; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
|
|
; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
|
|
; GFX10: attributes #[[ATTR2]] = { "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
|
|
; GFX10: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
|
|
; GFX10: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
|
|
; GFX10: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
|
|
;.
|