[AMDGPU] Fix UB in tests due to mismatched calling conventions (#137957)

Fixes UB in tests due to mismatched CC (definition and callsite).
This commit is contained in:
Manuel Carrasco 2025-05-06 17:31:27 +01:00 committed by GitHub
parent 4eac576654
commit 6479e9bad3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 16 additions and 76 deletions

View File

@ -60,11 +60,11 @@ declare [5 x i8] @external_a5i8_func_void() #0
declare hidden i32 @external_i32_func_i32(i32) #0
; amdgpu_gfx calling convention
declare i1 @external_gfx_i1_func_void() #0
declare i8 @external_gfx_i8_func_void() #0
declare i32 @external_gfx_i32_func_void() #0
declare { i32, i64 } @external_gfx_i32_i64_func_void() #0
declare hidden i32 @external_gfx_i32_func_i32(i32) #0
declare amdgpu_gfx i1 @external_gfx_i1_func_void() #0
declare amdgpu_gfx i8 @external_gfx_i8_func_void() #0
declare amdgpu_gfx i32 @external_gfx_i32_func_void() #0
declare amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() #0
declare hidden amdgpu_gfx i32 @external_gfx_i32_func_i32(i32) #0
define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {

View File

@ -802,7 +802,7 @@ entry:
ret i32 %ret
}
declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-LABEL: name: sibling_call_fastcc_multi_byval
@ -969,7 +969,7 @@ entry:
ret void
}
declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
; Callee has a byval and non-byval stack passed argument
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {

View File

@ -209,7 +209,7 @@ define amdgpu_kernel void @call_coldcc() #0 {
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_endpgm
%val = call float @coldcc(float 1.0)
%val = call coldcc float @coldcc(float 1.0)
store float %val, ptr addrspace(1) poison
ret void
}
@ -303,7 +303,7 @@ define amdgpu_kernel void @call_fastcc() #0 {
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_endpgm
%val = call float @fastcc(float 1.0)
%val = call fastcc float @fastcc(float 1.0)
store float %val, ptr addrspace(1) poison
ret void
}

View File

@ -777,117 +777,57 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1)
ret void
}
define hidden void @void_func_void_clobber_s33() #1 {
define hidden amdgpu_gfx void @void_func_void_clobber_s33() #1 {
; GFX9-LABEL: void_func_void_clobber_s33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: v_writelane_b32 v0, s33, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s33, v0, 0
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v0, s33, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s33, v0, 0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v0, s33, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s33, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s33}"() #0
ret void
}
define hidden void @void_func_void_clobber_s34() #1 {
define hidden amdgpu_gfx void @void_func_void_clobber_s34() #1 {
; GFX9-LABEL: void_func_void_clobber_s34:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: v_writelane_b32 v0, s34, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s34, v0, 0
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v0, s34, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s34, v0, 0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v0, s34, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s34, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s34}"() #0
ret void

View File

@ -310,7 +310,7 @@ define amdgpu_ps void @test_export_in_callee(float %v) #0 {
; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GCN-NEXT: s_endpgm
%x = fadd float %v, 1.0
call void @test_export_gfx(float %x)
call amdgpu_gfx void @test_export_gfx(float %x)
ret void
}
@ -330,7 +330,7 @@ define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
; GCN-NEXT: s_endpgm
%x = fadd float %v, 1.0
call void @llvm.amdgcn.s.setprio(i16 0)
call void @test_export_gfx(float %x)
call amdgpu_gfx void @test_export_gfx(float %x)
ret void
}

View File

@ -872,7 +872,7 @@ entry:
ret i32 %ret
}
declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-LABEL: sibling_call_fastcc_multi_byval:
@ -907,7 +907,7 @@ entry:
ret void
}
declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
; Callee has a byval and non-byval stack passed argument
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {

View File

@ -4,7 +4,7 @@
target triple = "amdgcn-amd-amdhsa"
; Unknown functions are conservatively passed all implicit parameters
declare void @unknown_call()
declare fastcc void @unknown_call()
; Use the same constant as a sgpr parameter (for the kernel id) and for a vector operation
define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id !0 {
; CHECK-LABEL: kern: