[NFC][AMDGPU] Use zeroinitializer instead of null for ptr addrspace(2/3/5) in AMDGPU tests (#181710)
This commit is contained in:
parent
9532f3f5fa
commit
90d1a559df
@ -8,7 +8,7 @@ entry:
|
||||
; CHECK: remark: test.c:3:11: in function 'f', 'load' instruction ('%load0') accesses memory in flat address space
|
||||
%load0 = load i32, ptr addrspace(0) null, align 4, !dbg !6
|
||||
%load1 = load i32, ptr addrspace(1) null, align 4, !dbg !6
|
||||
%load2 = load i32, ptr addrspace(2) null, align 4, !dbg !6
|
||||
%load2 = load i32, ptr addrspace(2) zeroinitializer, align 4, !dbg !6
|
||||
|
||||
; store
|
||||
; CHECK: remark: test.c:4:6: in function 'f', 'store' instruction accesses memory in flat address space
|
||||
|
||||
@ -21,8 +21,8 @@ define amdgpu_gs <4 x float> @_amdgpu_gs_main() {
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
bb:
|
||||
%i = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 16), align 4
|
||||
%i1 = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 20), align 4
|
||||
%i = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) zeroinitializer, i32 16), align 4
|
||||
%i1 = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) zeroinitializer, i32 20), align 4
|
||||
%i2 = shufflevector <1 x float> %i, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
|
||||
call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %i2, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
|
||||
%i3 = shufflevector <1 x float> %i1, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
|
||||
|
||||
@ -2003,7 +2003,7 @@ define amdgpu_kernel void @p1i8_arg(ptr addrspace(1) %arg) nounwind {
|
||||
; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
|
||||
; LEGACY-MESA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
|
||||
store i8 9, ptr addrspace(3) null
|
||||
store i8 9, ptr addrspace(3) zeroinitializer
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -418,7 +418,7 @@ define amdgpu_kernel void @set_inactive_p2(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
||||
; GCN-NEXT: s_endpgm
|
||||
%tmp.0 = call ptr addrspace(2) @llvm.amdgcn.set.inactive.p2(ptr addrspace(2) %in, ptr addrspace(2) null) #0
|
||||
%tmp.0 = call ptr addrspace(2) @llvm.amdgcn.set.inactive.p2(ptr addrspace(2) %in, ptr addrspace(2) zeroinitializer) #0
|
||||
%tmp = call ptr addrspace(2) @llvm.amdgcn.strict.wwm.p2(ptr addrspace(2) %tmp.0)
|
||||
store ptr addrspace(2) %tmp, ptr addrspace(1) %out
|
||||
ret void
|
||||
@ -439,7 +439,7 @@ define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
||||
; GCN-NEXT: s_endpgm
|
||||
%tmp.0 = call ptr addrspace(3) @llvm.amdgcn.set.inactive.p3(ptr addrspace(3) %in, ptr addrspace(3) null) #0
|
||||
%tmp.0 = call ptr addrspace(3) @llvm.amdgcn.set.inactive.p3(ptr addrspace(3) %in, ptr addrspace(3) zeroinitializer) #0
|
||||
%tmp = call ptr addrspace(3) @llvm.amdgcn.strict.wwm.p3(ptr addrspace(3) %tmp.0)
|
||||
store ptr addrspace(3) %tmp, ptr addrspace(1) %out
|
||||
ret void
|
||||
@ -460,7 +460,7 @@ define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
||||
; GCN-NEXT: s_endpgm
|
||||
%tmp.0 = call ptr addrspace(5) @llvm.amdgcn.set.inactive.p5(ptr addrspace(5) %in, ptr addrspace(5) null) #0
|
||||
%tmp.0 = call ptr addrspace(5) @llvm.amdgcn.set.inactive.p5(ptr addrspace(5) %in, ptr addrspace(5) zeroinitializer) #0
|
||||
%tmp = call ptr addrspace(5) @llvm.amdgcn.strict.wwm.p5(ptr addrspace(5) %tmp.0)
|
||||
store ptr addrspace(5) %tmp, ptr addrspace(1) %out
|
||||
ret void
|
||||
|
||||
@ -31,7 +31,7 @@ define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
|
||||
; HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
|
||||
; HSA-NEXT: ret void
|
||||
;
|
||||
store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4))
|
||||
store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) zeroinitializer to ptr addrspace(4))
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -227,7 +227,7 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
|
||||
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
|
||||
; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
|
||||
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
|
||||
%cast = addrspacecast ptr addrspace(3) null to ptr
|
||||
%cast = addrspacecast ptr addrspace(3) zeroinitializer to ptr
|
||||
store volatile i32 7, ptr %cast
|
||||
ret void
|
||||
}
|
||||
@ -270,7 +270,7 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
|
||||
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
|
||||
; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
|
||||
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
|
||||
%cast = addrspacecast ptr addrspace(5) null to ptr
|
||||
%cast = addrspacecast ptr addrspace(5) zeroinitializer to ptr
|
||||
store volatile i32 7, ptr %cast
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -862,7 +862,7 @@ bb16: ; preds = %bb58, %bb14
|
||||
%i33 = load volatile <2 x half>, ptr addrspace(1) %i31, align 8
|
||||
%i34 = getelementptr inbounds [16 x half], ptr addrspace(1) null, i64 %i24, i64 14
|
||||
%i36 = load volatile <2 x half>, ptr addrspace(1) %i34, align 4
|
||||
%i43 = load volatile <2 x float>, ptr addrspace(3) null, align 8
|
||||
%i43 = load volatile <2 x float>, ptr addrspace(3) zeroinitializer, align 8
|
||||
%i46 = load volatile <2 x float>, ptr addrspace(3) poison, align 32
|
||||
fence syncscope("workgroup") acquire
|
||||
br i1 %i11, label %bb58, label %bb51
|
||||
|
||||
@ -350,7 +350,7 @@ define void @indirect_use_group_to_flat_addrspacecast() #1 {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast(ptr addrspace(3) null)
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_group_to_flat_addrspacecast(ptr addrspace(3) null)
|
||||
call void @use_group_to_flat_addrspacecast(ptr addrspace(3) zeroinitializer)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -360,7 +360,7 @@ define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) null)
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) null)
|
||||
call void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) zeroinitializer)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -370,7 +370,7 @@ define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) null)
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) null)
|
||||
call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) zeroinitializer)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -462,7 +462,7 @@ define void @func_call_defined() #3 {
|
||||
define void @func_call_asm() #3 {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR24:[0-9]+]]
|
||||
; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR25:[0-9]+]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void asm sideeffect "", ""() #3
|
||||
@ -568,7 +568,7 @@ define float @func_other_intrinsic_call(float %arg) #3 {
|
||||
; Hostcall needs to be enabled for sanitizers
|
||||
define amdgpu_kernel void @kern_sanitize_address() #4 {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
@ -579,7 +579,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
|
||||
; Hostcall needs to be enabled for sanitizers
|
||||
define void @func_sanitize_address() #4 {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
@ -590,7 +590,7 @@ define void @func_sanitize_address() #4 {
|
||||
; Hostcall needs to be enabled for sanitizers
|
||||
define void @func_indirect_sanitize_address() #3 {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
@ -601,7 +601,7 @@ define void @func_indirect_sanitize_address() #3 {
|
||||
; Hostcall needs to be enabled for sanitizers
|
||||
define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
@ -627,7 +627,7 @@ declare void @enqueue_block_decl() #6
|
||||
|
||||
define internal void @enqueue_block_def() #6 {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
@ -635,7 +635,7 @@ define internal void @enqueue_block_def() #6 {
|
||||
|
||||
define amdgpu_kernel void @kern_call_enqueued_block_decl() {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl()
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
@ -645,7 +645,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() {
|
||||
|
||||
define amdgpu_kernel void @kern_call_enqueued_block_def() {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def()
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
@ -655,7 +655,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() {
|
||||
|
||||
define void @unused_enqueue_block() {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
@ -663,7 +663,7 @@ define void @unused_enqueue_block() {
|
||||
|
||||
define internal void @known_func() {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
@ -672,8 +672,8 @@ define internal void @known_func() {
|
||||
; Should never happen
|
||||
define amdgpu_kernel void @kern_callsite_enqueue_block() {
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR25:[0-9]+]]
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR26:[0-9]+]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @known_func() #6
|
||||
@ -707,13 +707,14 @@ attributes #6 = { "enqueued-block" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { nounwind }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "enqueued-block" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR17:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR21:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { nounwind }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "enqueued-block" }
|
||||
;.
|
||||
|
||||
@ -115,7 +115,7 @@ if.end273.i.i: ; preds = %if.then263.i.i, %if
|
||||
|
||||
if.end294.i.i: ; preds = %if.end273.i.i, %if.then263.i.i, %if.end13.i.i
|
||||
%ls111.sroa.0.2.i = phi <4 x float> [ zeroinitializer, %if.end13.i.i ], [ zeroinitializer, %if.end273.i.i ], [ <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %if.then263.i.i ]
|
||||
store <4 x float> zeroinitializer, ptr addrspace(5) null, align 4
|
||||
store <4 x float> zeroinitializer, ptr addrspace(5) zeroinitializer, align 4
|
||||
br label %kernel_direct_lighting.exit
|
||||
|
||||
kernel_direct_lighting.exit: ; preds = %if.end294.i.i, %entry
|
||||
|
||||
@ -1002,7 +1002,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
bb:
|
||||
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%i11 = icmp eq i32 %i, 0
|
||||
%i12 = load i32, ptr addrspace(3) null, align 8
|
||||
%i12 = load i32, ptr addrspace(3) zeroinitializer, align 8
|
||||
%i13 = zext i32 %i12 to i64
|
||||
%i14 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i13
|
||||
br i1 %arg3, label %bb15, label %bb103
|
||||
@ -1014,7 +1014,7 @@ bb15:
|
||||
br i1 %arg4, label %bb19, label %bb20
|
||||
|
||||
bb19:
|
||||
store i64 %i18, ptr addrspace(5) null, align 8
|
||||
store i64 %i18, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb20:
|
||||
@ -1026,7 +1026,7 @@ bb20:
|
||||
br i1 %i25, label %bb26, label %bb27
|
||||
|
||||
bb26:
|
||||
store i64 %i22, ptr addrspace(5) null, align 8
|
||||
store i64 %i22, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb27:
|
||||
@ -1038,7 +1038,7 @@ bb27:
|
||||
br i1 %i32, label %bb33, label %bb34
|
||||
|
||||
bb33:
|
||||
store i64 %i29, ptr addrspace(5) null, align 8
|
||||
store i64 %i29, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb34:
|
||||
@ -1050,7 +1050,7 @@ bb34:
|
||||
br i1 %i39, label %bb40, label %bb41
|
||||
|
||||
bb40:
|
||||
store i64 %i36, ptr addrspace(5) null, align 8
|
||||
store i64 %i36, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb41:
|
||||
@ -1062,7 +1062,7 @@ bb41:
|
||||
br i1 %i46, label %bb47, label %bb48
|
||||
|
||||
bb47:
|
||||
store i64 %i43, ptr addrspace(5) null, align 8
|
||||
store i64 %i43, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb48:
|
||||
@ -1074,7 +1074,7 @@ bb48:
|
||||
br i1 %i53, label %bb54, label %bb55
|
||||
|
||||
bb54:
|
||||
store i64 %i50, ptr addrspace(5) null, align 8
|
||||
store i64 %i50, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb55:
|
||||
@ -1091,7 +1091,7 @@ bb61:
|
||||
br i1 %arg5, label %bb62, label %bb63
|
||||
|
||||
bb62:
|
||||
store i64 %i57, ptr addrspace(5) null, align 8
|
||||
store i64 %i57, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb63:
|
||||
@ -1105,7 +1105,7 @@ bb66:
|
||||
br i1 %arg5, label %bb67, label %bb68
|
||||
|
||||
bb67:
|
||||
store i64 %i64, ptr addrspace(5) null, align 8
|
||||
store i64 %i64, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb68:
|
||||
@ -1127,7 +1127,7 @@ bb73:
|
||||
br i1 %i78, label %bb79, label %bb80
|
||||
|
||||
bb79:
|
||||
store i64 %i75, ptr addrspace(5) null, align 8
|
||||
store i64 %i75, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb80:
|
||||
@ -1137,7 +1137,7 @@ bb80:
|
||||
br i1 %arg6, label %bb84, label %bb85
|
||||
|
||||
bb84:
|
||||
store i64 %i82, ptr addrspace(5) null, align 8
|
||||
store i64 %i82, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb85:
|
||||
@ -1147,11 +1147,11 @@ bb85:
|
||||
br i1 %i88, label %bb89, label %bb90
|
||||
|
||||
bb89:
|
||||
store i64 %i83, ptr addrspace(5) null, align 8
|
||||
store i64 %i83, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb90:
|
||||
%i91 = load i64, ptr addrspace(3) null, align 8
|
||||
%i91 = load i64, ptr addrspace(3) zeroinitializer, align 8
|
||||
%i92 = load i64, ptr addrspace(3) %arg8, align 8
|
||||
%i93 = load i64, ptr addrspace(3) %arg7, align 8
|
||||
%i94 = trunc i64 %i91 to i32
|
||||
@ -1172,7 +1172,7 @@ bb104:
|
||||
ret void
|
||||
|
||||
bb105:
|
||||
%i106 = load i64, ptr addrspace(3) null, align 8
|
||||
%i106 = load i64, ptr addrspace(3) zeroinitializer, align 8
|
||||
%i107 = load i64, ptr addrspace(3) %arg9, align 8
|
||||
%i108 = load i64, ptr addrspace(3) %arg7, align 8
|
||||
%i109 = load i64, ptr addrspace(3) %arg10, align 8
|
||||
@ -1252,7 +1252,7 @@ bb161:
|
||||
%i171 = select i1 %i170, i32 0, i32 %i169
|
||||
%i172 = or i32 %i171, %i157
|
||||
%i173 = zext i32 %i172 to i64
|
||||
store i64 %i173, ptr addrspace(3) null, align 4
|
||||
store i64 %i173, ptr addrspace(3) zeroinitializer, align 4
|
||||
unreachable
|
||||
|
||||
bb174:
|
||||
@ -1278,21 +1278,21 @@ bb186:
|
||||
%i192 = zext i32 %i178 to i64
|
||||
%i193 = zext i32 %i182 to i64
|
||||
%i194 = zext i32 %i181 to i64
|
||||
store i64 %i187, ptr addrspace(3) null, align 8
|
||||
store i64 %i187, ptr addrspace(3) zeroinitializer, align 8
|
||||
store i64 %i188, ptr addrspace(3) %arg7, align 8
|
||||
store i64 %i189, ptr addrspace(3) %arg8, align 8
|
||||
store i64 %i190, ptr addrspace(3) null, align 8
|
||||
store i64 %i190, ptr addrspace(3) zeroinitializer, align 8
|
||||
store i64 %i191, ptr addrspace(3) %arg7, align 8
|
||||
store i64 %i192, ptr addrspace(3) null, align 8
|
||||
store i64 %i192, ptr addrspace(3) zeroinitializer, align 8
|
||||
store i64 %i193, ptr addrspace(3) %arg7, align 8
|
||||
store i64 %i194, ptr addrspace(3) null, align 8
|
||||
store i64 %i194, ptr addrspace(3) zeroinitializer, align 8
|
||||
%i195 = ptrtoint ptr addrspace(1) %i185 to i64
|
||||
store i64 %i195, ptr addrspace(5) null, align 8
|
||||
store i64 %i195, ptr addrspace(5) zeroinitializer, align 8
|
||||
unreachable
|
||||
|
||||
bb196:
|
||||
%i197 = zext i32 %i184 to i64
|
||||
store i64 %i197, ptr addrspace(3) null, align 8
|
||||
store i64 %i197, ptr addrspace(3) zeroinitializer, align 8
|
||||
unreachable
|
||||
}
|
||||
|
||||
|
||||
@ -456,7 +456,7 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
|
||||
; GCN_DBG-NEXT: s_cbranch_vccnz .LBB4_1
|
||||
; GCN_DBG-NEXT: s_branch .LBB4_2
|
||||
entry:
|
||||
%cond = load volatile i1, ptr addrspace(3) null
|
||||
%cond = load volatile i1, ptr addrspace(3) zeroinitializer
|
||||
br label %for.body
|
||||
|
||||
for.exit:
|
||||
|
||||
@ -68,9 +68,9 @@ define <2 x half> @chain_hi_to_lo_private() {
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
bb:
|
||||
%gep_lo = getelementptr inbounds half, ptr addrspace(5) null, i64 1
|
||||
%gep_lo = getelementptr inbounds half, ptr addrspace(5) zeroinitializer, i64 1
|
||||
%load_lo = load half, ptr addrspace(5) %gep_lo
|
||||
%load_hi = load half, ptr addrspace(5) null
|
||||
%load_hi = load half, ptr addrspace(5) zeroinitializer
|
||||
|
||||
%temp = insertelement <2 x half> poison, half %load_lo, i32 0
|
||||
%result = insertelement <2 x half> %temp, half %load_hi, i32 1
|
||||
@ -244,9 +244,9 @@ define <2 x half> @chain_hi_to_lo_group() {
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
bb:
|
||||
%gep_lo = getelementptr inbounds half, ptr addrspace(3) null, i64 1
|
||||
%gep_lo = getelementptr inbounds half, ptr addrspace(3) zeroinitializer, i64 1
|
||||
%load_lo = load half, ptr addrspace(3) %gep_lo
|
||||
%load_hi = load half, ptr addrspace(3) null
|
||||
%load_hi = load half, ptr addrspace(3) zeroinitializer
|
||||
|
||||
%temp = insertelement <2 x half> poison, half %load_lo, i32 0
|
||||
%result = insertelement <2 x half> %temp, half %load_hi, i32 1
|
||||
|
||||
@ -27,6 +27,6 @@ entry:
|
||||
store i64 0, ptr addrspace(5) %load, align 8
|
||||
%gep = getelementptr i8, ptr addrspace(5) %load, i32 8
|
||||
store i64 0, ptr addrspace(5) %gep, align 8
|
||||
call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) null, ptr addrspace(5) align 8 %load, i64 16, i1 false)
|
||||
call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) zeroinitializer, ptr addrspace(5) align 8 %load, i64 16, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -164,7 +164,7 @@ bb.inner.then: ; preds = %bb.outer.then
|
||||
br label %bb.outer.end
|
||||
|
||||
bb.outer.end: ; preds = %bb.outer.then, %bb.inner.then, %bb
|
||||
store i32 3, ptr addrspace(3) null
|
||||
store i32 3, ptr addrspace(3) zeroinitializer
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -360,7 +360,7 @@ bb.inner.end: ; preds = %bb.inner.then, %bb.
|
||||
br label %bb.outer.end
|
||||
|
||||
bb.outer.end: ; preds = %bb.inner.then, %bb
|
||||
store i32 3, ptr addrspace(3) null
|
||||
store i32 3, ptr addrspace(3) zeroinitializer
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -586,7 +586,7 @@ bb.else: ; preds = %bb.outer.then
|
||||
br label %bb.outer.end
|
||||
|
||||
bb.outer.end: ; preds = %bb, %bb.then, %bb.else
|
||||
store i32 3, ptr addrspace(3) null
|
||||
store i32 3, ptr addrspace(3) zeroinitializer
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -872,7 +872,7 @@ bb.inner.then2:
|
||||
br label %bb.outer.end
|
||||
|
||||
bb.outer.end:
|
||||
store i32 3, ptr addrspace(3) null
|
||||
store i32 3, ptr addrspace(3) zeroinitializer
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -314,6 +314,6 @@ define amdgpu_kernel void @concat_vector_crash2(ptr addrspace(1) %out, ptr addrs
|
||||
define amdgpu_kernel void @build_vector_splat_concat_v8i16() {
|
||||
entry:
|
||||
store <8 x i16> zeroinitializer, ptr addrspace(3) poison, align 16
|
||||
store <8 x i16> zeroinitializer, ptr addrspace(3) null, align 16
|
||||
store <8 x i16> zeroinitializer, ptr addrspace(3) zeroinitializer, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -138,7 +138,7 @@ endif:
|
||||
define amdgpu_kernel void @divergent_loop(ptr addrspace(1) %out) #0 {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%load0 = load volatile i32, ptr addrspace(3) null
|
||||
%load0 = load volatile i32, ptr addrspace(3) zeroinitializer
|
||||
%cmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %cmp0, label %loop, label %end
|
||||
|
||||
@ -252,7 +252,7 @@ end:
|
||||
define amdgpu_kernel void @divergent_if_else_endif(ptr addrspace(1) %out) #0 {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%load0 = load volatile i32, ptr addrspace(3) null
|
||||
%load0 = load volatile i32, ptr addrspace(3) zeroinitializer
|
||||
%cmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %cmp0, label %if, label %else
|
||||
|
||||
|
||||
@ -80,10 +80,10 @@ for.body71.preheader: ; preds = %entry
|
||||
br label %if.end82
|
||||
|
||||
if.end82: ; preds = %for.body71.preheader, %entry
|
||||
%f9 = load <2 x float>, ptr addrspace(3) null, align 8
|
||||
%f9 = load <2 x float>, ptr addrspace(3) zeroinitializer, align 8
|
||||
tail call void @llvm.amdgcn.s.barrier()
|
||||
%f10 = load float, ptr addrspace(3) %add.ptr10, align 4
|
||||
%f11 = load float, ptr addrspace(3) null, align 4
|
||||
%f11 = load float, ptr addrspace(3) zeroinitializer, align 4
|
||||
fence acquire
|
||||
%f12 = load float, ptr addrspace(3) %add.ptr10, align 4
|
||||
%f13 = load float, ptr addrspace(3) %arrayidx53, align 4
|
||||
@ -98,9 +98,9 @@ if.end82: ; preds = %for.body71.preheade
|
||||
%f17 = shufflevector <14 x float> zeroinitializer, <14 x float> %f16, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15>
|
||||
%f18 = insertelement <14 x float> %f17, float %f11, i64 13
|
||||
%f19 = shufflevector <14 x float> %f18, <14 x float> zeroinitializer, <2 x i32> <i32 12, i32 13>
|
||||
%f20 = load <2 x float>, ptr addrspace(3) null, align 8
|
||||
%f20 = load <2 x float>, ptr addrspace(3) zeroinitializer, align 8
|
||||
%f21 = fmul contract <2 x float> zeroinitializer, %f20
|
||||
store float 0.000000e+00, ptr addrspace(3) null, align 4
|
||||
store float 0.000000e+00, ptr addrspace(3) zeroinitializer, align 4
|
||||
%f22 = load <2 x float>, ptr addrspace(3) %arrayidx75.3.6, align 8
|
||||
%f23 = fmul <2 x float> zeroinitializer, %f22
|
||||
%f24 = shufflevector <2 x float> %f23, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
@ -109,7 +109,7 @@ if.end82: ; preds = %for.body71.preheade
|
||||
%f27 = shufflevector <2 x float> %f26, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%f28 = shufflevector <14 x float> %f25, <14 x float> %f27, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15>
|
||||
%f29 = extractelement <14 x float> %f28, i64 0
|
||||
%f30 = load <2 x float>, ptr addrspace(3) null, align 4
|
||||
%f30 = load <2 x float>, ptr addrspace(3) zeroinitializer, align 4
|
||||
%f31 = shufflevector <14 x float> %f28, <14 x float> zeroinitializer, <2 x i32> <i32 11, i32 12>
|
||||
%f32 = fsub <2 x float> zeroinitializer, %f14
|
||||
%f33 = fmul contract <2 x float> %f32, zeroinitializer
|
||||
|
||||
@ -17,7 +17,7 @@ entry:
|
||||
#dbg_value(ptr addrspace(1) null, !4, !DIExpression(), !13)
|
||||
tail call void @llvm.amdgcn.s.barrier()
|
||||
fence acquire
|
||||
store float 0.000000e+00, ptr addrspace(3) null, align 4
|
||||
store float 0.000000e+00, ptr addrspace(3) zeroinitializer, align 4
|
||||
%f26 = fsub <2 x float> %f14, %f33
|
||||
%f27 = shufflevector <2 x float> %f26, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%f28 = shufflevector <14 x float> zeroinitializer, <14 x float> %f27, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15>
|
||||
|
||||
@ -35,7 +35,7 @@ bb25: ; preds = %bb
|
||||
|
||||
bb28: ; preds = %bb25, %bb21
|
||||
%tmp29 = phi <4 x float> [ %tmp27, %bb25 ], [ %tmp24, %bb21 ]
|
||||
store <4 x float> %tmp29, ptr addrspace(5) null, align 16
|
||||
store <4 x float> %tmp29, ptr addrspace(5) zeroinitializer, align 16
|
||||
%tmp30 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 2
|
||||
%tmp31 = load i32, ptr addrspace(1) %tmp30, align 4
|
||||
%tmp32 = sext i32 %tmp31 to i64
|
||||
|
||||
@ -152,7 +152,7 @@ bb14: ; preds = %bb13
|
||||
br i1 %arg, label %bb15, label %bb6
|
||||
|
||||
bb15: ; preds = %bb14
|
||||
store double 0.000000e+00, ptr addrspace(5) null, align 2147483648
|
||||
store double 0.000000e+00, ptr addrspace(5) zeroinitializer, align 2147483648
|
||||
br label %bb6
|
||||
|
||||
bb16: ; preds = %bb13
|
||||
|
||||
@ -129,7 +129,7 @@ define amdgpu_kernel void @zextload_flat_i16(ptr addrspace(1) noalias %out, ptr
|
||||
define amdgpu_kernel void @flat_scratch_unaligned_load() {
|
||||
%scratch = alloca i32, addrspace(5)
|
||||
%fptr = addrspacecast ptr addrspace(5) %scratch to ptr
|
||||
store volatile ptr %fptr, ptr addrspace(3) null
|
||||
store volatile ptr %fptr, ptr addrspace(3) zeroinitializer
|
||||
%ld = load volatile i32, ptr %fptr, align 1
|
||||
ret void
|
||||
}
|
||||
@ -140,7 +140,7 @@ define amdgpu_kernel void @flat_scratch_unaligned_load() {
|
||||
define amdgpu_kernel void @flat_scratch_unaligned_store() {
|
||||
%scratch = alloca i32, addrspace(5)
|
||||
%fptr = addrspacecast ptr addrspace(5) %scratch to ptr
|
||||
store volatile ptr %fptr, ptr addrspace(3) null
|
||||
store volatile ptr %fptr, ptr addrspace(3) zeroinitializer
|
||||
store volatile i32 0, ptr %fptr, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -373,13 +373,13 @@ entry:
|
||||
br label %.shuffle.then.i.i.i.i
|
||||
|
||||
.shuffle.then.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i, %entry
|
||||
store i64 0, ptr addrspace(5) null, align 4
|
||||
store i64 0, ptr addrspace(5) zeroinitializer, align 4
|
||||
%icmp = icmp ugt i64 %p2i, 1
|
||||
br i1 %icmp, label %.shuffle.then.i.i.i.i, label %vector.body.i.i.i.i
|
||||
|
||||
vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i
|
||||
%wide.load9.i.i.i.i = load <2 x i32>, ptr addrspace(5) %.omp.reduction.element.i.i.i.i, align 4
|
||||
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) null, align 4
|
||||
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) zeroinitializer, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -393,14 +393,14 @@ entry:
|
||||
br label %.shuffle.then.i.i.i.i
|
||||
|
||||
.shuffle.then.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i, %entry
|
||||
store i64 0, ptr addrspace(5) null, align 4
|
||||
store i64 0, ptr addrspace(5) zeroinitializer, align 4
|
||||
%or = and i32 %p2i, -512
|
||||
%icmp = icmp ugt i32 %or, 9999999
|
||||
br i1 %icmp, label %.shuffle.then.i.i.i.i, label %vector.body.i.i.i.i
|
||||
|
||||
vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i
|
||||
%wide.load9.i.i.i.i = load <2 x i32>, ptr addrspace(5) %.omp.reduction.element.i.i.i.i, align 4
|
||||
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) null, align 4
|
||||
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) zeroinitializer, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -414,14 +414,14 @@ entry:
|
||||
br label %.shuffle.then.i.i.i.i
|
||||
|
||||
.shuffle.then.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i, %entry
|
||||
store i64 0, ptr addrspace(5) null, align 4
|
||||
store i64 0, ptr addrspace(5) zeroinitializer, align 4
|
||||
%or = or i32 %p2i, 12345
|
||||
%icmp = icmp ugt i32 %or, 9999999
|
||||
br i1 %icmp, label %.shuffle.then.i.i.i.i, label %vector.body.i.i.i.i
|
||||
|
||||
vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i
|
||||
%wide.load9.i.i.i.i = load <2 x i32>, ptr addrspace(5) %.omp.reduction.element.i.i.i.i, align 4
|
||||
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) null, align 4
|
||||
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) zeroinitializer, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ bb0:
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
%dst = phi ptr [ null, %bb0 ], [ addrspacecast (ptr addrspace(3) null to ptr), %entry ]
|
||||
%dst = phi ptr [ null, %bb0 ], [ addrspacecast (ptr addrspace(3) zeroinitializer to ptr), %entry ]
|
||||
store i64 0, ptr %dst, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -8,10 +8,10 @@
|
||||
define amdgpu_kernel void @shouldNotReApply() {
|
||||
entry:
|
||||
tail call void @llvm.amdgcn.sched.barrier(i32 0)
|
||||
store <4 x i32> zeroinitializer, ptr addrspace(3) null, align 2147483648
|
||||
store <4 x i32> zeroinitializer, ptr addrspace(3) zeroinitializer, align 2147483648
|
||||
tail call void @llvm.amdgcn.sched.group.barrier(i32 0, i32 0, i32 0)
|
||||
tail call void @llvm.amdgcn.sched.barrier(i32 0)
|
||||
store i32 0, ptr addrspace(5) null, align 2147483648
|
||||
store i32 0, ptr addrspace(5) zeroinitializer, align 2147483648
|
||||
tail call void @llvm.amdgcn.sched.group.barrier(i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -144,7 +144,7 @@ bb2: ; preds = %bb13, %bb11, %bb
|
||||
br i1 %icmp, label %bb3, label %bb13
|
||||
|
||||
bb3: ; preds = %bb2
|
||||
%load4 = load i32, ptr addrspace(5) null, align 4
|
||||
%load4 = load i32, ptr addrspace(5) zeroinitializer, align 4
|
||||
%load5 = load i32, ptr addrspace(1) null, align 4
|
||||
br label %bb6
|
||||
|
||||
|
||||
@ -66,7 +66,7 @@ define amdgpu_cs float @ds_ordered_add_cs(ptr addrspace(2) inreg %gds) {
|
||||
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
|
||||
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
|
||||
define float @ds_ordered_add_default_cc() {
|
||||
%val = call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
|
||||
%val = call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) zeroinitializer, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
|
||||
%r = bitcast i32 %val to float
|
||||
ret float %r
|
||||
}
|
||||
@ -78,7 +78,7 @@ define float @ds_ordered_add_default_cc() {
|
||||
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
|
||||
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
|
||||
define fastcc float @ds_ordered_add_fastcc() {
|
||||
%val = call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
|
||||
%val = call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) zeroinitializer, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
|
||||
%r = bitcast i32 %val to float
|
||||
ret float %r
|
||||
}
|
||||
@ -90,7 +90,7 @@ define fastcc float @ds_ordered_add_fastcc() {
|
||||
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
|
||||
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
|
||||
define float @ds_ordered_add_func() {
|
||||
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
|
||||
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) zeroinitializer, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
|
||||
%r = bitcast i32 %val to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
@ -31,10 +31,10 @@ define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(<1 x i64> %L1) {
|
||||
; GCN-NEXT: s_endpgm
|
||||
entry:
|
||||
call void @llvm.amdgcn.iglp.opt(i32 1)
|
||||
%load.4 = load <32 x float>, ptr addrspace(3) null, align 128
|
||||
%load.4 = load <32 x float>, ptr addrspace(3) zeroinitializer, align 128
|
||||
%B = urem <1 x i64> zeroinitializer, %L1
|
||||
store <32 x float> %load.4, ptr addrspace(3) null, align 128
|
||||
store <1 x i64> %B, ptr addrspace(3) null, align 8
|
||||
store <32 x float> %load.4, ptr addrspace(3) zeroinitializer, align 128
|
||||
store <1 x i64> %B, ptr addrspace(3) zeroinitializer, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -435,7 +435,7 @@ define amdgpu_kernel void @set_inactive_p2(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
||||
; GCN-NEXT: s_endpgm
|
||||
%tmp.0 = call ptr addrspace(2) @llvm.amdgcn.set.inactive.p2(ptr addrspace(2) %in, ptr addrspace(2) null) #0
|
||||
%tmp.0 = call ptr addrspace(2) @llvm.amdgcn.set.inactive.p2(ptr addrspace(2) %in, ptr addrspace(2) zeroinitializer) #0
|
||||
%tmp = call ptr addrspace(2) @llvm.amdgcn.strict.wwm.p2(ptr addrspace(2) %tmp.0)
|
||||
store ptr addrspace(2) %tmp, ptr addrspace(1) %out
|
||||
ret void
|
||||
@ -456,7 +456,7 @@ define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
||||
; GCN-NEXT: s_endpgm
|
||||
%tmp.0 = call ptr addrspace(3) @llvm.amdgcn.set.inactive.p3(ptr addrspace(3) %in, ptr addrspace(3) null) #0
|
||||
%tmp.0 = call ptr addrspace(3) @llvm.amdgcn.set.inactive.p3(ptr addrspace(3) %in, ptr addrspace(3) zeroinitializer) #0
|
||||
%tmp = call ptr addrspace(3) @llvm.amdgcn.strict.wwm.p3(ptr addrspace(3) %tmp.0)
|
||||
store ptr addrspace(3) %tmp, ptr addrspace(1) %out
|
||||
ret void
|
||||
@ -477,7 +477,7 @@ define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
||||
; GCN-NEXT: s_endpgm
|
||||
%tmp.0 = call ptr addrspace(5) @llvm.amdgcn.set.inactive.p5(ptr addrspace(5) %in, ptr addrspace(5) null) #0
|
||||
%tmp.0 = call ptr addrspace(5) @llvm.amdgcn.set.inactive.p5(ptr addrspace(5) %in, ptr addrspace(5) zeroinitializer) #0
|
||||
%tmp = call ptr addrspace(5) @llvm.amdgcn.strict.wwm.p5(ptr addrspace(5) %tmp.0)
|
||||
store ptr addrspace(5) %tmp, ptr addrspace(1) %out
|
||||
ret void
|
||||
|
||||
@ -65,7 +65,7 @@ entry:
|
||||
%gep = getelementptr inbounds i16, ptr addrspace(3) %in, i32 8
|
||||
%load.lo = load i16, ptr addrspace(3) %in
|
||||
%load.hi = load i16, ptr addrspace(3) %gep
|
||||
store i16 %load.lo, ptr addrspace(3) null
|
||||
store i16 %load.lo, ptr addrspace(3) zeroinitializer
|
||||
%build0 = insertelement <2 x i16> poison, i16 %load.lo, i32 0
|
||||
%build1 = insertelement <2 x i16> %build0, i16 %load.hi, i32 1
|
||||
ret <2 x i16> %build1
|
||||
@ -132,7 +132,7 @@ entry:
|
||||
%gep = getelementptr inbounds i16, ptr addrspace(3) %in, i32 8
|
||||
%load.lo = load i16, ptr addrspace(3) %in
|
||||
%load.hi = load i16, ptr addrspace(3) %gep
|
||||
store i16 %load.hi, ptr addrspace(3) null
|
||||
store i16 %load.hi, ptr addrspace(3) zeroinitializer
|
||||
%build0 = insertelement <2 x i16> poison, i16 %load.lo, i32 0
|
||||
%build1 = insertelement <2 x i16> %build0, i16 %load.hi, i32 1
|
||||
ret <2 x i16> %build1
|
||||
|
||||
@ -644,7 +644,7 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(ptr addrspace(3) %in, <
|
||||
entry:
|
||||
%load = load i16, ptr addrspace(3) %in
|
||||
%elt1 = extractelement <2 x i16> %reg, i32 1
|
||||
store i16 %load, ptr addrspace(3) null
|
||||
store i16 %load, ptr addrspace(3) zeroinitializer
|
||||
%build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
|
||||
store <2 x i16> %build1, ptr addrspace(1) poison
|
||||
ret void
|
||||
@ -694,7 +694,7 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(ptr addrspace(3) %in, <
|
||||
entry:
|
||||
%load = load i16, ptr addrspace(3) %in
|
||||
%elt1 = extractelement <2 x i16> %reg, i32 1
|
||||
store i16 %elt1, ptr addrspace(3) null
|
||||
store i16 %elt1, ptr addrspace(3) zeroinitializer
|
||||
%build1 = insertelement <2 x i16> %reg, i16 %load, i32 0
|
||||
store <2 x i16> %build1, ptr addrspace(1) poison
|
||||
ret void
|
||||
|
||||
@ -91,7 +91,7 @@ for.body51: ; preds = %if.end118, %for.bod
|
||||
|
||||
if.then112: ; preds = %for.body51
|
||||
%inc101 = add i32 %collision.0281, 3
|
||||
store i32 %inc101, ptr addrspace(3) null, align 2147483648
|
||||
store i32 %inc101, ptr addrspace(3) zeroinitializer, align 2147483648
|
||||
br label %if.end118
|
||||
|
||||
if.end118: ; preds = %if.then112, %for.body51
|
||||
|
||||
@ -15,7 +15,7 @@ define amdgpu_vs void @main(i32 inreg %arg) {
|
||||
main_body:
|
||||
%tmp = load float, ptr addrspace(3) poison, align 4
|
||||
%tmp1 = load float, ptr addrspace(3) poison, align 4
|
||||
store float %tmp, ptr addrspace(3) null, align 4
|
||||
store float %tmp, ptr addrspace(3) zeroinitializer, align 4
|
||||
%tmp2 = bitcast float %tmp to i32
|
||||
%tmp3 = add nuw nsw i32 0, 1
|
||||
%tmp4 = zext i32 %tmp3 to i64
|
||||
|
||||
@ -6,13 +6,13 @@
|
||||
; CHECK: ds_read_b32
|
||||
; CHECK: ds_write_b32
|
||||
define amdgpu_vs void @test1(i32 %v) #0 {
|
||||
%p1 = getelementptr i32, ptr addrspace(3) null, i32 1
|
||||
%p1 = getelementptr i32, ptr addrspace(3) zeroinitializer, i32 1
|
||||
|
||||
store i32 %v, ptr addrspace(3) null
|
||||
store i32 %v, ptr addrspace(3) zeroinitializer
|
||||
|
||||
call void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32 %v, ptr addrspace(8) poison, i32 0, i32 0, i32 68, i32 1)
|
||||
|
||||
%w = load i32, ptr addrspace(3) null
|
||||
%w = load i32, ptr addrspace(3) zeroinitializer
|
||||
store i32 %w, ptr addrspace(3) %p1
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@ loop: ; preds = %loop, %entry
|
||||
%i28 = shl i32 %n5, 1
|
||||
%p = getelementptr i8, ptr addrspace(3) %arg4, i32 %i28
|
||||
%i29 = load <4 x i8>, ptr addrspace(3) %p, align 4
|
||||
%i30 = load <4 x i8>, ptr addrspace(3) null, align 8
|
||||
%i30 = load <4 x i8>, ptr addrspace(3) zeroinitializer, align 8
|
||||
tail call void @llvm.amdgcn.sched.group.barrier(i32 0, i32 0, i32 0)
|
||||
br i1 %loopcond, label %loop, label %exit
|
||||
|
||||
@ -93,7 +93,7 @@ exit: ; preds = %loop
|
||||
%i44 = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) null, i32 22528, i32 0, i32 0)
|
||||
%i45 = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) null, i32 23552, i32 0, i32 0)
|
||||
%i46 = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) null, i32 %arg0, i32 0, i32 0)
|
||||
store <4 x i32> %i24, ptr addrspace(3) null, align 16
|
||||
store <4 x i32> %i24, ptr addrspace(3) zeroinitializer, align 16
|
||||
%p18 = getelementptr %f8, ptr addrspace(3) @shared, i32 %arg2
|
||||
store <4 x i32> %i25, ptr addrspace(3) %p18, align 16
|
||||
%p17 = getelementptr %f8, ptr addrspace(3) %p18, i32 512
|
||||
@ -154,7 +154,7 @@ exit: ; preds = %loop
|
||||
%be26 = extractelement <2 x i64> %bc11, i64 1
|
||||
%i75 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64 0, i64 %be26, <4 x float> %i74, i32 0, i32 0, i32 0)
|
||||
%i76 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64 0, i64 %be20, <4 x float> %i75, i32 0, i32 0, i32 0)
|
||||
%i77 = load <4 x i8>, ptr addrspace(3) null, align 8
|
||||
%i77 = load <4 x i8>, ptr addrspace(3) zeroinitializer, align 8
|
||||
%i78 = getelementptr i8, ptr addrspace(3) @shared, i32 %i28
|
||||
%p10 = getelementptr %f8, ptr addrspace(3) %i78, i32 %arg1
|
||||
%i79 = load <4 x i8>, ptr addrspace(3) %p10, align 4
|
||||
@ -293,11 +293,11 @@ exit: ; preds = %loop
|
||||
%m2 = and i32 %i, 48
|
||||
%i124 = getelementptr float, ptr addrspace(3) @shared, i32 %m2
|
||||
%p21 = getelementptr float, ptr addrspace(3) %i124, i32 %n6
|
||||
store float %ce4, ptr addrspace(3) null, align 4
|
||||
store float %ce4, ptr addrspace(3) zeroinitializer, align 4
|
||||
%ce3 = extractelement <4 x float> %i120, i64 0
|
||||
store float %ce3, ptr addrspace(3) %p21, align 4
|
||||
%ce2 = extractelement <4 x float> %i106, i64 0
|
||||
store float %ce2, ptr addrspace(3) null, align 4
|
||||
store float %ce2, ptr addrspace(3) zeroinitializer, align 4
|
||||
%ce = extractelement <4 x float> %i123, i64 0
|
||||
store float %ce, ptr addrspace(3) %p15, align 4
|
||||
%sx = sext i32 %a6 to i64
|
||||
|
||||
@ -47,7 +47,7 @@ define i32 @test_null_argument(i32 %n) {
|
||||
; CHECK-NEXT: ret i32 [[CALL1]]
|
||||
;
|
||||
%str = alloca [9 x i8], align 1, addrspace(5)
|
||||
%call1 = call i32 (ptr addrspace(5), ...) @printf(ptr addrspace(5) null, ptr addrspace(5) %str, i32 %n)
|
||||
%call1 = call i32 (ptr addrspace(5), ...) @printf(ptr addrspace(5) zeroinitializer, ptr addrspace(5) %str, i32 %n)
|
||||
ret i32 %call1
|
||||
}
|
||||
|
||||
|
||||
@ -3893,7 +3893,7 @@ define amdgpu_kernel void @any_extend_to_perm(i8 %arg, <4 x i8> %arg1) {
|
||||
; GFX9-NEXT: s_endpgm
|
||||
bb:
|
||||
%insertelement = insertelement <4 x i8> %arg1, i8 %arg, i64 3
|
||||
store <4 x i8> %insertelement, ptr addrspace(3) null, align 4
|
||||
store <4 x i8> %insertelement, ptr addrspace(3) zeroinitializer, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -119,7 +119,7 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(ptr addrspace(1) %o
|
||||
;
|
||||
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||
%ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
|
||||
%cmp = icmp eq ptr addrspace(5) null, %ptr0
|
||||
%cmp = icmp eq ptr addrspace(5) zeroinitializer, %ptr0
|
||||
%zext = zext i1 %cmp to i32
|
||||
store volatile i32 %zext, ptr addrspace(1) %out
|
||||
ret void
|
||||
|
||||
@ -104,7 +104,7 @@ bb:
|
||||
%tmp = alloca double, align 8, addrspace(5)
|
||||
store double 0.000000e+00, ptr addrspace(5) %tmp, align 8
|
||||
%tmp2 = icmp eq i32 %arg1, 0
|
||||
%tmp3 = select i1 %tmp2, ptr addrspace(5) %tmp, ptr addrspace(5) null
|
||||
%tmp3 = select i1 %tmp2, ptr addrspace(5) %tmp, ptr addrspace(5) zeroinitializer
|
||||
store double 1.000000e+00, ptr addrspace(5) %tmp3, align 8
|
||||
%tmp4 = load double, ptr addrspace(5) %tmp, align 8
|
||||
store double %tmp4, ptr addrspace(1) %arg
|
||||
@ -119,7 +119,7 @@ bb:
|
||||
%tmp = alloca double, align 8, addrspace(5)
|
||||
store double 0.000000e+00, ptr addrspace(5) %tmp, align 8
|
||||
%tmp2 = icmp eq i32 %arg1, 0
|
||||
%tmp3 = select i1 %tmp2, ptr addrspace(5) null, ptr addrspace(5) %tmp
|
||||
%tmp3 = select i1 %tmp2, ptr addrspace(5) zeroinitializer, ptr addrspace(5) %tmp
|
||||
store double 1.000000e+00, ptr addrspace(5) %tmp3, align 8
|
||||
%tmp4 = load double, ptr addrspace(5) %tmp, align 8
|
||||
store double %tmp4, ptr addrspace(1) %arg
|
||||
|
||||
@ -52,10 +52,10 @@ define amdgpu_kernel void @zot() {
|
||||
bb:
|
||||
%alloca = alloca %struct.barney, align 16, addrspace(5)
|
||||
%alloca1 = alloca %struct.barney, align 16, addrspace(5)
|
||||
store i32 0, ptr addrspace(5) null, align 2147483648
|
||||
store i32 0, ptr addrspace(5) zeroinitializer, align 2147483648
|
||||
call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef align 16 dereferenceable(16) %alloca1, ptr addrspace(5) noundef align 16 dereferenceable(16) %alloca, i64 16, i1 false)
|
||||
call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef align 16 dereferenceable(16) %alloca, ptr noundef nonnull align 1 dereferenceable(16) poison, i64 16, i1 false)
|
||||
%load = load volatile ptr, ptr addrspace(5) null, align 2147483648
|
||||
%load = load volatile ptr, ptr addrspace(5) zeroinitializer, align 2147483648
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb
|
||||
|
||||
@ -2657,7 +2657,7 @@ define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) {
|
||||
; GFX11-FAKE16-NEXT: ; %bb.2: ; %end
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%null = select i1 false, ptr %buffer, ptr addrspacecast (ptr addrspace(5) null to ptr)
|
||||
%null = select i1 false, ptr %buffer, ptr addrspacecast (ptr addrspace(5) zeroinitializer to ptr)
|
||||
%gep = getelementptr inbounds i8, ptr %null, i64 -1
|
||||
%ld = load i8, ptr %gep
|
||||
%cmp = icmp eq i8 %ld, 0
|
||||
|
||||
@ -8,12 +8,12 @@
|
||||
define amdgpu_kernel void @spill_scale_test(float %arg, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <16 x i32> %arg8, float %arg9, <16 x i32> %arg10, float %arg11, <16 x i8> %arg12) #0 {
|
||||
bb:
|
||||
%i = shufflevector <16 x i8> %arg12, <16 x i8> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) null, ptr addrspace(3) null, i32 0, i32 0)
|
||||
tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) null, ptr addrspace(3) zeroinitializer, i32 0, i32 0)
|
||||
%i13 = bitcast <64 x i8> %i to <16 x i32>
|
||||
tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) null, ptr addrspace(3) null, i32 0, i32 0)
|
||||
%i14 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) null)
|
||||
tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) null, ptr addrspace(3) zeroinitializer, i32 0, i32 0)
|
||||
%i14 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) zeroinitializer)
|
||||
%i15 = bitcast <2 x i32> %i14 to <8 x i8>
|
||||
%i16 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) null)
|
||||
%i16 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) zeroinitializer)
|
||||
%i17 = shufflevector <8 x i8> %i15, <8 x i8> zeroinitializer, <64 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%i18 = shufflevector <64 x i8> zeroinitializer, <64 x i8> %i17, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 64, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%i19 = insertelement <64 x i8> %i18, i8 0, i64 57
|
||||
@ -63,16 +63,16 @@ bb:
|
||||
%i59 = bitcast i32 %i56 to <4 x i8>
|
||||
%i60 = bitcast i32 %.extract1424 to <4 x i8>
|
||||
%i61 = shufflevector <4 x i8> %i60, <4 x i8> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%i62 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) null)
|
||||
%i62 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) zeroinitializer)
|
||||
%i63 = bitcast <2 x i32> %i62 to <8 x i8>
|
||||
%i64 = shufflevector <8 x i8> %i63, <8 x i8> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%i65 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) null)
|
||||
%i65 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) zeroinitializer)
|
||||
%i66 = bitcast <2 x i32> %i65 to <8 x i8>
|
||||
%i67 = shufflevector <8 x i8> %i66, <8 x i8> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%i68 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) null)
|
||||
%i68 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) zeroinitializer)
|
||||
%i69 = bitcast <2 x i32> %i68 to <8 x i8>
|
||||
%i70 = shufflevector <8 x i8> %i69, <8 x i8> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%i71 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) getelementptr (i8, ptr addrspace(3) null, i32 75232))
|
||||
%i71 = tail call <2 x i32> @llvm.amdgcn.ds.load.tr8.b64.v2i32(ptr addrspace(3) getelementptr (i8, ptr addrspace(3) zeroinitializer, i32 75232))
|
||||
%i72 = shufflevector <64 x i8> zeroinitializer, <64 x i8> %i58, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 64, i32 65, i32 66, i32 67>
|
||||
%i73 = bitcast <64 x i8> %i72 to <16 x i32>
|
||||
%i74 = shufflevector <4 x i8> %i59, <4 x i8> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
|
||||
@ -64,7 +64,7 @@ bb14: ; preds = %bb20, %bb11
|
||||
ret void
|
||||
|
||||
bb19: ; preds = %bb
|
||||
store i32 0, ptr addrspace(3) null, align 4
|
||||
store i32 0, ptr addrspace(3) zeroinitializer, align 4
|
||||
br label %bb20
|
||||
|
||||
bb20: ; preds = %bb19, %bb
|
||||
|
||||
@ -20,7 +20,7 @@ define i32 @f() {
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
bb:
|
||||
%i = load i32, ptr addrspace(3) null, align 16
|
||||
%i = load i32, ptr addrspace(3) zeroinitializer, align 16
|
||||
%i6 = icmp ult i32 0, %i
|
||||
%i7 = sext i1 %i6 to i32
|
||||
%i8 = add i32 %i7, 1
|
||||
|
||||
@ -422,7 +422,7 @@ define void @func_stackrestore_null() {
|
||||
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s4, s4, 5
|
||||
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
|
||||
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
|
||||
call void @llvm.stackrestore.p5(ptr addrspace(5) null)
|
||||
call void @llvm.stackrestore.p5(ptr addrspace(5) zeroinitializer)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -494,7 +494,7 @@ entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
store volatile i16 %hi, ptr addrspace(5) null
|
||||
store volatile i16 %hi, ptr addrspace(5) zeroinitializer
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -516,7 +516,7 @@ entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
store volatile i8 %trunc, ptr addrspace(5) null
|
||||
store volatile i8 %trunc, ptr addrspace(5) zeroinitializer
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
define amdgpu_cs float @sub_zext_zext() {
|
||||
.entry:
|
||||
|
||||
%t519 = load float, ptr addrspace(3) null
|
||||
%t519 = load float, ptr addrspace(3) zeroinitializer
|
||||
|
||||
%t524 = fcmp ogt float %t519, 0.000000e+00
|
||||
%t525 = fcmp olt float %t519, 0.000000e+00
|
||||
|
||||
@ -96,11 +96,11 @@ bb:
|
||||
%and = and i32 %call, 31
|
||||
%icmp = icmp eq i32 %and, 0
|
||||
%lshr = lshr i32 %call, 5
|
||||
%getelementptr67 = getelementptr inbounds nuw %struct.snork, ptr addrspace(3) null, i32 %call
|
||||
%getelementptr67 = getelementptr inbounds nuw %struct.snork, ptr addrspace(3) zeroinitializer, i32 %call
|
||||
%addrspacecast68 = addrspacecast ptr addrspace(3) %getelementptr67 to ptr
|
||||
%getelementptr69 = getelementptr inbounds nuw i8, ptr addrspace(3) null, i32 %lshr
|
||||
%getelementptr69 = getelementptr inbounds nuw i8, ptr addrspace(3) zeroinitializer, i32 %lshr
|
||||
%addrspacecast70 = addrspacecast ptr addrspace(3) %getelementptr69 to ptr
|
||||
%getelementptr71 = getelementptr inbounds nuw i32, ptr addrspace(3) null, i32 %lshr
|
||||
%getelementptr71 = getelementptr inbounds nuw i32, ptr addrspace(3) zeroinitializer, i32 %lshr
|
||||
%addrspacecast72 = addrspacecast ptr addrspace(3) %getelementptr71 to ptr
|
||||
%load73 = load ptr, ptr addrspace(4) @global, align 8
|
||||
%addrspacecast74 = addrspacecast ptr %load73 to ptr addrspace(4)
|
||||
|
||||
@ -28,7 +28,7 @@ define protected amdgpu_kernel void @foo(ptr addrspace(1) %arg, ptr addrspace(1)
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
bb:
|
||||
%tmp = addrspacecast ptr addrspace(5) null to ptr
|
||||
%tmp = addrspacecast ptr addrspace(5) zeroinitializer to ptr
|
||||
%tmp2 = call i64 @eggs(ptr poison) #1
|
||||
%tmp3 = load ptr, ptr %tmp, align 8
|
||||
%tmp4 = getelementptr inbounds i64, ptr %tmp3, i64 0
|
||||
|
||||
@ -48,7 +48,7 @@ define void @tail_call_i64_inreg_uniform_in_vgpr_convergence_tokens() #0 {
|
||||
; CHECK-NEXT: CONVERGENCECTRL_GLUE [[CONVERGENCECTRL_ENTRY]]
|
||||
; CHECK-NEXT: SI_TCRETURN killed [[S_LOAD_DWORDX2_IMM]], @void_func_i64_inreg, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0, implicit $sgpr1, implicit [[CONVERGENCECTRL_ENTRY]]
|
||||
%t = call token @llvm.experimental.convergence.entry()
|
||||
%uniform.vgpr = load i64, ptr addrspace(3) null, align 8
|
||||
%uniform.vgpr = load i64, ptr addrspace(3) zeroinitializer, align 8
|
||||
tail call void @void_func_i64_inreg(i64 inreg %uniform.vgpr) #0 [ "convergencectrl"(token %t) ]
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -66,7 +66,7 @@ define void @tail_call_i64_inreg_uniform_in_vgpr() {
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s1, v1
|
||||
; CHECK-NEXT: s_setpc_b64 s[16:17]
|
||||
%uniform.vgpr = load i64, ptr addrspace(3) null, align 8
|
||||
%uniform.vgpr = load i64, ptr addrspace(3) zeroinitializer, align 8
|
||||
tail call void @void_func_i64_inreg(i64 inreg %uniform.vgpr)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -44,7 +44,7 @@ define void @tail_call_uniform_vgpr_value_convergence_tokens() #0 {
|
||||
; CHECK-NEXT: CONVERGENCECTRL_GLUE [[CONVERGENCECTRL_ENTRY]]
|
||||
; CHECK-NEXT: SI_TCRETURN killed [[REG_SEQUENCE]], 0, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit [[CONVERGENCECTRL_ENTRY]]
|
||||
%t = call token @llvm.experimental.convergence.entry()
|
||||
%fptr = load ptr, ptr addrspace(3) null, align 8
|
||||
%fptr = load ptr, ptr addrspace(3) zeroinitializer, align 8
|
||||
tail call void %fptr() #0 [ "convergencectrl"(token %t) ]
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -13,7 +13,7 @@ define void @tail_call_uniform_vgpr_value() {
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s17, v1
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s16, v0
|
||||
; CHECK-NEXT: s_setpc_b64 s[16:17]
|
||||
%fptr = load ptr, ptr addrspace(3) null, align 8
|
||||
%fptr = load ptr, ptr addrspace(3) zeroinitializer, align 8
|
||||
tail call void %fptr()
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -134,7 +134,7 @@ entry:
|
||||
|
||||
trap:
|
||||
call void @llvm.trap()
|
||||
store volatile i32 1234, ptr addrspace(3) null
|
||||
store volatile i32 1234, ptr addrspace(3) zeroinitializer
|
||||
br label %ret
|
||||
|
||||
ret:
|
||||
|
||||
@ -657,7 +657,7 @@ bb:
|
||||
bb5: ; preds = %bb5.backedge, %bb
|
||||
%tmp4.i.sroa.0.0 = phi <9 x double> [ poison, %bb ], [ %tmp4.i.sroa.0.1, %bb5.backedge ]
|
||||
%tmp14.1.i = load i32, ptr inttoptr (i64 128 to ptr), align 128
|
||||
store i32 0, ptr addrspace(5) null, align 4
|
||||
store i32 0, ptr addrspace(5) zeroinitializer, align 4
|
||||
%tmp14.2.i = load i32, ptr inttoptr (i64 128 to ptr), align 128
|
||||
%tmp15.2.i = icmp eq i32 %tmp14.2.i, 0
|
||||
%spec.select.2.i = select i1 %tmp15.2.i, i32 0, i32 %tmp14.1.i
|
||||
|
||||
@ -182,6 +182,6 @@ sw.bb7.i.i.i3.i.i: ; preds = %bb.3
|
||||
bb.4: ; preds = %sw.bb7.i.i.i3.i.i, %bb.3
|
||||
%phi.4 = phi <4 x i32> [ %phi.3, %bb.3 ], [ %insert.0, %sw.bb7.i.i.i3.i.i ]
|
||||
%extract = extractelement <4 x i32> %phi.4, i64 0
|
||||
store i32 %extract, ptr addrspace(5) null, align 4
|
||||
store i32 %extract, ptr addrspace(5) zeroinitializer, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -477,7 +477,7 @@ bb2:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds i32, ptr addrspace(1) null, i32 %tid
|
||||
%tmp3 = load i32, ptr addrspace(1) %gep, align 16
|
||||
store float 0.000000e+00, ptr addrspace(5) null, align 8
|
||||
store float 0.000000e+00, ptr addrspace(5) zeroinitializer, align 8
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb2
|
||||
@ -493,7 +493,7 @@ bb8: ; preds = %bb4
|
||||
br i1 %tmp9, label %bb10, label %bb1
|
||||
|
||||
bb10: ; preds = %bb8
|
||||
store float 0x7FF8000000000000, ptr addrspace(5) null, align 16
|
||||
store float 0x7FF8000000000000, ptr addrspace(5) zeroinitializer, align 16
|
||||
br label %bb18
|
||||
|
||||
bb11: ; preds = %bb6
|
||||
@ -506,15 +506,15 @@ bb14: ; preds = %bb11
|
||||
br i1 %tmp15, label %bb17, label %bb16
|
||||
|
||||
bb16: ; preds = %bb14
|
||||
store float 0x7FF8000000000000, ptr addrspace(5) null, align 16
|
||||
store float 0x7FF8000000000000, ptr addrspace(5) zeroinitializer, align 16
|
||||
br label %bb17
|
||||
|
||||
bb17: ; preds = %bb16, %bb14
|
||||
store float %tmp, ptr addrspace(5) null, align 16
|
||||
store float %tmp, ptr addrspace(5) zeroinitializer, align 16
|
||||
br label %bb18
|
||||
|
||||
bb18: ; preds = %bb17, %bb10
|
||||
store float 0x7FF8000000000000, ptr addrspace(5) null, align 4
|
||||
store float 0x7FF8000000000000, ptr addrspace(5) zeroinitializer, align 4
|
||||
br label %bb2
|
||||
}
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ entry:
|
||||
%icmp.intr = tail call i64 @llvm.amdgcn.icmp.i64.i16(i16 0, i16 0, i32 32)
|
||||
%cmp0 = icmp eq i64 %icmp.intr, 0
|
||||
%zext0 = zext i1 %cmp0 to i32
|
||||
store i32 %zext0, ptr addrspace(3) null, align 2147483648
|
||||
store i32 %zext0, ptr addrspace(3) zeroinitializer, align 2147483648
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -38,7 +38,7 @@ entry:
|
||||
%fcmp.intr = tail call i64 @llvm.amdgcn.fcmp.i64.f16(half %x, half %y, i32 5)
|
||||
%cmp0 = icmp eq i64 %fcmp.intr, 0
|
||||
%zext0 = zext i1 %cmp0 to i32
|
||||
store i32 %zext0, ptr addrspace(3) null, align 2147483648
|
||||
store i32 %zext0, ptr addrspace(3) zeroinitializer, align 2147483648
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -58,7 +58,7 @@ define amdgpu_kernel void @ballot_test(half %x, half %y) {
|
||||
; CHECK-NEXT: s_endpgm
|
||||
%cmp = fcmp oeq half %x, %y
|
||||
%ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
|
||||
store i64 %ballot, ptr addrspace(3) null, align 2147483648
|
||||
store i64 %ballot, ptr addrspace(3) zeroinitializer, align 2147483648
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -71,7 +71,7 @@ define amdgpu_kernel void @foo(i1 %cmp1) {
|
||||
entry:
|
||||
%wbr = alloca <4 x i32>, align 16, addrspace(5)
|
||||
store ptr null, ptr addrspace(5) %wbr, align 16
|
||||
%wbr_1 = load <4 x i32>, ptr addrspace(5) null, align 16
|
||||
%wbr_1 = load <4 x i32>, ptr addrspace(5) zeroinitializer, align 16
|
||||
%call1 = tail call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %wbr_1, i32 0, i32 0, i32 0)
|
||||
%0 = fpext float %call1 to double
|
||||
%sel1 = select i1 %cmp1, double 1.000000e+00, double 0.000000e+00
|
||||
|
||||
@ -11,11 +11,11 @@ define amdgpu_cs void @xyz () {
|
||||
.entry:
|
||||
br label %loop
|
||||
loop:
|
||||
%ld = load <8 x float>, ptr addrspace(5) null, align 32
|
||||
%ld = load <8 x float>, ptr addrspace(5) zeroinitializer, align 32
|
||||
%in_shuffle = shufflevector <8 x float> %ld, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%wmma = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> poison, <16 x half> poison, <4 x float> %in_shuffle)
|
||||
%out_shuffle = shufflevector <4 x float> %wmma, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
store <8 x float> %out_shuffle, ptr addrspace(5) null, align 32
|
||||
store <8 x float> %out_shuffle, ptr addrspace(5) zeroinitializer, align 32
|
||||
br i1 false, label %.exit, label %loop
|
||||
.exit:
|
||||
ret void
|
||||
|
||||
@ -3435,7 +3435,7 @@ bb:
|
||||
call void @llvm.amdgcn.init.exec(i64 -1)
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> zeroinitializer, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0)
|
||||
%i = call i32 @llvm.amdgcn.wqm.i32(i32 0)
|
||||
store i32 %i, ptr addrspace(3) null, align 4
|
||||
store i32 %i, ptr addrspace(3) zeroinitializer, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -231,7 +231,7 @@ define void @local_nullptr(ptr addrspace(1) nocapture %results, ptr addrspace(3)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%tobool = icmp ne ptr addrspace(3) %a, addrspacecast (ptr addrspace(5) null to ptr addrspace(3))
|
||||
%tobool = icmp ne ptr addrspace(3) %a, addrspacecast (ptr addrspace(5) zeroinitializer to ptr addrspace(3))
|
||||
%conv = zext i1 %tobool to i32
|
||||
store i32 %conv, ptr addrspace(1) %results, align 4
|
||||
ret void
|
||||
|
||||
@ -82,7 +82,7 @@ define i1 @icmp_group_flat_cmp_constant_inttoptr(ptr addrspace(3) %group.ptr.0)
|
||||
; CHECK: %cmp = icmp eq ptr %cast0, addrspacecast (ptr addrspace(5) null to ptr)
|
||||
define i1 @icmp_mismatch_flat_group_private_cmp_null(ptr addrspace(3) %group.ptr.0) #0 {
|
||||
%cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
|
||||
%cmp = icmp eq ptr %cast0, addrspacecast (ptr addrspace(5) null to ptr)
|
||||
%cmp = icmp eq ptr %cast0, addrspacecast (ptr addrspace(5) zeroinitializer to ptr)
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
@ -135,7 +135,7 @@ define i1 @icmp_group_flat_cmp_poison(ptr addrspace(3) %group.ptr.0) #0 {
|
||||
; CHECK: %cmp = icmp eq ptr addrspacecast (ptr addrspace(5) null to ptr), %cast0
|
||||
define i1 @icmp_mismatch_flat_group_private_cmp_null_swap(ptr addrspace(3) %group.ptr.0) #0 {
|
||||
%cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
|
||||
%cmp = icmp eq ptr addrspacecast (ptr addrspace(5) null to ptr), %cast0
|
||||
%cmp = icmp eq ptr addrspacecast (ptr addrspace(5) zeroinitializer to ptr), %cast0
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
|
||||
@ -36,7 +36,7 @@ define i8 @ptrmask_cast_local_null_to_flat(i64 %mask) {
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
|
||||
; CHECK-NEXT: ret i8 [[LOAD]]
|
||||
;
|
||||
%masked = call ptr @llvm.ptrmask.p0.i64(ptr addrspacecast (ptr addrspace(3) null to ptr), i64 %mask)
|
||||
%masked = call ptr @llvm.ptrmask.p0.i64(ptr addrspacecast (ptr addrspace(3) zeroinitializer to ptr), i64 %mask)
|
||||
%load = load i8, ptr %masked
|
||||
ret i8 %load
|
||||
}
|
||||
@ -47,7 +47,7 @@ define <3 x ptr addrspace(3)> @ptrmask_vector_cast_local_null_to_flat(<3 x i64>
|
||||
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <3 x ptr> [[MASKED]] to <3 x ptr addrspace(3)>
|
||||
; CHECK-NEXT: ret <3 x ptr addrspace(3)> [[CAST]]
|
||||
;
|
||||
%masked = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> addrspacecast (<3 x ptr addrspace(3)> <ptr addrspace(3) null, ptr addrspace(3) null, ptr addrspace(3) null> to <3 x ptr>), <3 x i64> %mask)
|
||||
%masked = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> addrspacecast (<3 x ptr addrspace(3)> <ptr addrspace(3) zeroinitializer, ptr addrspace(3) zeroinitializer, ptr addrspace(3) zeroinitializer> to <3 x ptr>), <3 x i64> %mask)
|
||||
%cast = addrspacecast <3 x ptr> %masked to <3 x ptr addrspace(3)>
|
||||
ret <3 x ptr addrspace(3)> %cast
|
||||
}
|
||||
@ -330,7 +330,7 @@ define i8 @ptrmask_cast_local_null_to_flat_const_mask_7fffffffffffffff() {
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
|
||||
; CHECK-NEXT: ret i8 [[LOAD]]
|
||||
;
|
||||
%cast = addrspacecast ptr addrspace(3) null to ptr
|
||||
%cast = addrspacecast ptr addrspace(3) zeroinitializer to ptr
|
||||
%masked = call ptr @llvm.ptrmask.p0.i64(ptr %cast, i64 9223372036854775807)
|
||||
%load = load i8, ptr %masked
|
||||
ret i8 %load
|
||||
@ -364,7 +364,7 @@ define i8 @ptrmask_cast_local_null_to_flat_const_mask_ffffffff00000000() {
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) null, align 1
|
||||
; CHECK-NEXT: ret i8 [[LOAD]]
|
||||
;
|
||||
%cast = addrspacecast ptr addrspace(3) null to ptr
|
||||
%cast = addrspacecast ptr addrspace(3) zeroinitializer to ptr
|
||||
%masked = call ptr @llvm.ptrmask.p0.i64(ptr %cast, i64 -4294967296)
|
||||
%load = load i8, ptr %masked
|
||||
ret i8 %load
|
||||
|
||||
@ -164,7 +164,7 @@ define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swa
|
||||
; CHECK: %select = select i1 %c, ptr addrspacecast (ptr addrspace(3) null to ptr), ptr addrspacecast (ptr addrspace(1) null to ptr)
|
||||
; CHECK: store i32 7, ptr %select
|
||||
define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
|
||||
%select = select i1 %c, ptr addrspacecast (ptr addrspace(3) null to ptr), ptr addrspacecast (ptr addrspace(1) null to ptr)
|
||||
%select = select i1 %c, ptr addrspacecast (ptr addrspace(3) zeroinitializer to ptr), ptr addrspacecast (ptr addrspace(1) null to ptr)
|
||||
store i32 7, ptr %select
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -19,7 +19,7 @@ define i1 @not_fold_select(ptr addrspace(1) noundef %x) {
|
||||
%asc.flat = addrspacecast ptr addrspace(1) %x to ptr
|
||||
%is.shared = tail call i1 @llvm.amdgcn.is.shared(ptr %asc.flat)
|
||||
%asc.shared = addrspacecast ptr %asc.flat to ptr addrspace(3)
|
||||
%shared.addr = select i1 %is.shared, ptr addrspace(3) %asc.shared, ptr addrspace(3) null
|
||||
%shared.addr = select i1 %is.shared, ptr addrspace(3) %asc.shared, ptr addrspace(3) zeroinitializer
|
||||
%result = icmp eq ptr addrspace(3) %shared.addr, null
|
||||
ret i1 %result
|
||||
}
|
||||
|
||||
@ -27,7 +27,7 @@ entry:
|
||||
%coerce = alloca %double_double, align 8, addrspace(5)
|
||||
%alpha_union = addrspacecast ptr addrspace(5) %coerce to ptr
|
||||
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %coerce, ptr addrspace(4) align 8 %in, i64 16, i1 false)
|
||||
%load1 = load i8, ptr addrspace(5) null, align 1
|
||||
%load1 = load i8, ptr addrspace(5) zeroinitializer, align 1
|
||||
%loadedv = trunc i8 %load1 to i1
|
||||
br i1 %loadedv, label %cond.end, label %cond.false
|
||||
|
||||
|
||||
@ -28,7 +28,7 @@ entry:
|
||||
|
||||
%add = fadd float %ld.c, %ld.c.idx.1
|
||||
store float %add, ptr addrspace(1) %b, align 4
|
||||
store i32 %foo, ptr addrspace(3) null, align 4
|
||||
store i32 %foo, ptr addrspace(3) zeroinitializer, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -55,7 +55,7 @@ entry:
|
||||
|
||||
%add = fadd float %ld.c, %ld.c.idx.1
|
||||
store float %add, ptr addrspace(1) %b, align 4
|
||||
store i32 %foo, ptr addrspace(3) null, align 4
|
||||
store i32 %foo, ptr addrspace(3) zeroinitializer, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@ -34,8 +34,8 @@ entry:
|
||||
%ld.c = load ptr addrspace(3), ptr addrspace(3) %b, align 4
|
||||
%ld.c.idx.1 = load ptr addrspace(3), ptr addrspace(3) %b.1, align 4
|
||||
|
||||
store ptr addrspace(3) null, ptr addrspace(3) %a, align 4
|
||||
store ptr addrspace(3) null, ptr addrspace(3) %a.1, align 4
|
||||
store ptr addrspace(3) zeroinitializer, ptr addrspace(3) %a, align 4
|
||||
store ptr addrspace(3) zeroinitializer, ptr addrspace(3) %a.1, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
@ -58,7 +58,7 @@ entry:
|
||||
%ld.2 = load <2 x i32>, ptr addrspace(3) %b.2, align 8
|
||||
|
||||
store i32 0, ptr addrspace(3) %a.0, align 16
|
||||
store ptr addrspace(3) null, ptr addrspace(3) %a.1, align 4
|
||||
store ptr addrspace(3) zeroinitializer, ptr addrspace(3) %a.1, align 4
|
||||
store <2 x i32> <i32 0, i32 0>, ptr addrspace(3) %a.2, align 8
|
||||
|
||||
ret void
|
||||
@ -82,7 +82,7 @@ entry:
|
||||
%ld.2 = load i32, ptr addrspace(3) %b.2, align 4
|
||||
|
||||
store <2 x i32> <i32 0, i32 0>, ptr addrspace(3) %a.0, align 16
|
||||
store ptr addrspace(3) null, ptr addrspace(3) %a.1, align 8
|
||||
store ptr addrspace(3) zeroinitializer, ptr addrspace(3) %a.1, align 8
|
||||
store i32 0, ptr addrspace(3) %a.2, align 4
|
||||
|
||||
ret void
|
||||
|
||||
@ -11,7 +11,7 @@ target datalayout = "e-p:64:64-p1:64:64-p5:32:32"
|
||||
; CHECK: store ptr undef, ptr %tmp7, align 8
|
||||
define void @cast_to_ptr() {
|
||||
entry:
|
||||
%ascast = addrspacecast ptr addrspace(5) null to ptr
|
||||
%ascast = addrspacecast ptr addrspace(5) zeroinitializer to ptr
|
||||
%tmp4 = icmp eq i32 undef, 0
|
||||
%tmp6 = select i1 false, ptr undef, ptr undef
|
||||
%tmp7 = select i1 %tmp4, ptr null, ptr %tmp6
|
||||
@ -28,7 +28,7 @@ entry:
|
||||
define void @cast_to_cast() {
|
||||
entry:
|
||||
%a.ascast = addrspacecast ptr addrspace(5) undef to ptr
|
||||
%b.ascast = addrspacecast ptr addrspace(5) null to ptr
|
||||
%b.ascast = addrspacecast ptr addrspace(5) zeroinitializer to ptr
|
||||
%tmp1 = select i1 false, ptr %a.ascast, ptr undef
|
||||
%tmp3 = select i1 false, ptr %b.ascast, ptr undef
|
||||
%tmp4 = load ptr, ptr %tmp1, align 8
|
||||
|
||||
@ -21,7 +21,7 @@ entry:
|
||||
|
||||
loop:
|
||||
%idx0 = phi i32 [ %next_idx0, %loop ], [ 0, %entry ]
|
||||
%0 = getelementptr inbounds i32, ptr addrspace(5) null, i32 %idx0
|
||||
%0 = getelementptr inbounds i32, ptr addrspace(5) zeroinitializer, i32 %idx0
|
||||
%1 = getelementptr inbounds i32, ptr addrspace(1) null, i32 %idx0
|
||||
store i32 1, ptr addrspace(5) %0
|
||||
store i32 7, ptr addrspace(1) %1
|
||||
|
||||
@ -41,7 +41,7 @@ for.body.1:
|
||||
%conv.1 = phi i64 [ %conv.2, %for.body.1 ], [ %conv, %loopexit ]
|
||||
%I.1 = phi i32 [ %inc.1, %for.body.1 ], [ %inc, %loopexit ]
|
||||
%idxprom = trunc i64 %conv.1 to i32
|
||||
%arrayidx = getelementptr inbounds ptr, ptr addrspace(5) null, i32 %idxprom
|
||||
%arrayidx = getelementptr inbounds ptr, ptr addrspace(5) zeroinitializer, i32 %idxprom
|
||||
%0 = load ptr, ptr addrspace(5) %arrayidx, align 8
|
||||
%arrayidx.1 = getelementptr inbounds ptr, ptr null, i64 %conv.1
|
||||
store ptr %0, ptr %arrayidx.1, align 8
|
||||
@ -58,7 +58,7 @@ for.body:
|
||||
define protected amdgpu_kernel void @baseregtest(i32 %n, i32 %lda, i1 %arg) local_unnamed_addr {
|
||||
; CHECK-LABEL: @baseregtest(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[IF_END:%.*]]
|
||||
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[EXIT:%.*]], label [[IF_END:%.*]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 3
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user