llvm-project/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
Janek van Oirschot c897c13dde
[AMDGPU] Convert AMDGPUResourceUsageAnalysis pass from Module to MF pass (#102913)
Converts AMDGPUResourceUsageAnalysis pass from Module to MachineFunction
pass. Moves function resource info propagation to to MC layer (through
helpers in AMDGPUMCResourceInfo) by generating MCExprs for every
function resource which the emitters have been prepped for.

Fixes https://github.com/llvm/llvm-project/issues/64863
2024-09-30 11:43:34 +01:00

306 lines
9.9 KiB
LLVM

; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
; Make sure to run a GPU with the SGPR allocation bug.
; GCN-LABEL: {{^}}use_vcc:
; GCN: ; TotalNumSgprs: 34
; GCN: ; NumVgprs: 0
define void @use_vcc() #1 {
call void asm sideeffect "", "~{vcc}" () #0
ret void
}
; GCN-LABEL: {{^}}indirect_use_vcc:
; GCN: s_mov_b32 s4, s33
; GCN: v_writelane_b32 v40, s4, 2
; GCN: v_writelane_b32 v40, s30, 0
; GCN: v_writelane_b32 v40, s31, 1
; GCN: s_swappc_b64
; GCN: v_readlane_b32 s31, v40, 1
; GCN: v_readlane_b32 s30, v40, 0
; GCN: v_readlane_b32 s4, v40, 2
; GCN: s_mov_b32 s33, s4
; GCN: s_setpc_b64 s[30:31]
; GCN: ; TotalNumSgprs: 36
; GCN: ; NumVgprs: 41
define void @indirect_use_vcc() #1 {
call void @use_vcc()
ret void
}
; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
; CI: ; TotalNumSgprs: 38
; VI-NOBUG: ; TotalNumSgprs: 40
; VI-BUG: ; TotalNumSgprs: 96
; GCN: ; NumVgprs: 41
define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out) #0 {
call void @indirect_use_vcc()
ret void
}
; GCN-LABEL: {{^}}use_flat_scratch:
; CI: ; TotalNumSgprs: 36
; VI: ; TotalNumSgprs: 38
; GCN: ; NumVgprs: 0
define void @use_flat_scratch() #1 {
call void asm sideeffect "", "~{flat_scratch}" () #0
ret void
}
; GCN-LABEL: {{^}}indirect_use_flat_scratch:
; CI: ; TotalNumSgprs: 38
; VI: ; TotalNumSgprs: 40
; GCN: ; NumVgprs: 41
define void @indirect_use_flat_scratch() #1 {
call void @use_flat_scratch()
ret void
}
; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
; CI: ; TotalNumSgprs: 38
; VI-NOBUG: ; TotalNumSgprs: 40
; VI-BUG: ; TotalNumSgprs: 96
; GCN: ; NumVgprs: 41
define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace(1) %out) #0 {
call void @indirect_use_flat_scratch()
ret void
}
; GCN-LABEL: {{^}}use_10_vgpr:
; GCN: ; NumVgprs: 10
define void @use_10_vgpr() #1 {
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0
call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0
ret void
}
; GCN-LABEL: {{^}}indirect_use_10_vgpr:
; GCN: ; NumVgprs: 41
define void @indirect_use_10_vgpr() #0 {
call void @use_10_vgpr()
ret void
}
; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
; GCN: ; NumVgprs: 41
define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
call void @indirect_use_10_vgpr()
ret void
}
; GCN-LABEL: {{^}}use_50_vgpr:
; GCN: ; NumVgprs: 50
define void @use_50_vgpr() #1 {
call void asm sideeffect "", "~{v49}"() #0
ret void
}
; GCN-LABEL: {{^}}indirect_use_50_vgpr:
; GCN: ; NumVgprs: 50
define void @indirect_use_50_vgpr() #0 {
call void @use_50_vgpr()
ret void
}
; GCN-LABEL: {{^}}use_80_sgpr:
; GCN: ; TotalNumSgprs: 80
define void @use_80_sgpr() #1 {
call void asm sideeffect "", "~{s79}"() #0
ret void
}
; GCN-LABEL: {{^}}indirect_use_80_sgpr:
; GCN: ; TotalNumSgprs: 82
define void @indirect_use_80_sgpr() #1 {
call void @use_80_sgpr()
ret void
}
; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
; CI: ; TotalNumSgprs: 84
; VI-NOBUG: ; TotalNumSgprs: 86
; VI-BUG: ; TotalNumSgprs: 96
define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
call void @indirect_use_80_sgpr()
ret void
}
; GCN-LABEL: {{^}}use_stack0:
; GCN: ScratchSize: 2052
define void @use_stack0() #1 {
%alloca = alloca [512 x i32], align 4, addrspace(5)
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
ret void
}
; GCN-LABEL: {{^}}use_stack1:
; GCN: ScratchSize: 404
define void @use_stack1() #1 {
%alloca = alloca [100 x i32], align 4, addrspace(5)
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
ret void
}
; GCN-LABEL: {{^}}indirect_use_stack:
; GCN: ScratchSize: 2132
define void @indirect_use_stack() #1 {
%alloca = alloca [16 x i32], align 4, addrspace(5)
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
call void @use_stack0()
ret void
}
; GCN-LABEL: {{^}}indirect_2_level_use_stack:
; GCN: ScratchSize: 2132
define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
call void @indirect_use_stack()
ret void
}
; Should be maximum of callee usage
; GCN-LABEL: {{^}}multi_call_use_use_stack:
; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_use_use_stack() #0 {
call void @use_stack0()
call void @use_stack1()
ret void
}
declare void @external() #0
; GCN-LABEL: {{^}}usage_external:
; TotalNumSgprs: 48
; NumVgprs: 24
; GCN: ScratchSize: 16384
;
; GCN-V5-LABEL: {{^}}usage_external:
; GCN-V5: ScratchSize: 0
define amdgpu_kernel void @usage_external() #0 {
call void @external()
ret void
}
declare void @external_recurse() #2
; GCN-LABEL: {{^}}usage_external_recurse:
; TotalNumSgprs: 48
; NumVgprs: 24
; GCN: ScratchSize: 16384
;
; GCN-V5-LABEL: {{^}}usage_external_recurse:
; GCN-V5: ScratchSize: 0
define amdgpu_kernel void @usage_external_recurse() #0 {
call void @external_recurse()
ret void
}
; GCN-LABEL: {{^}}direct_recursion_use_stack:
; GCN: ScratchSize: 18448{{$}}
;
; GCN-V5-LABEL: {{^}}direct_recursion_use_stack:
; GCN-V5: ScratchSize: 2064{{$}}
define void @direct_recursion_use_stack(i32 %val) #2 {
%alloca = alloca [512 x i32], align 4, addrspace(5)
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %ret, label %call
call:
%val.sub1 = sub i32 %val, 1
call void @direct_recursion_use_stack(i32 %val.sub1)
br label %ret
ret:
ret void
}
; GCN-LABEL: {{^}}usage_direct_recursion:
; GCN: .amdhsa_private_segment_fixed_size 18448
;
; GCN-V5-LABEL: {{^}}usage_direct_recursion:
; GCN-V5: .amdhsa_private_segment_fixed_size 2064{{$}}
define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
call void @direct_recursion_use_stack(i32 %n)
ret void
}
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
; CI: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr
define amdgpu_kernel void @count_use_sgpr96_external_call() {
entry:
tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
call void @external()
ret void
}
; Make sure there's no assert when a sgpr160 is used.
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
; CI: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr
define amdgpu_kernel void @count_use_sgpr160_external_call() {
entry:
tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
call void @external()
ret void
}
; Make sure there's no assert when a vgpr160 is used.
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(5, amdgpu.max_num_vgpr)
; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
; CI: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr
define amdgpu_kernel void @count_use_vgpr160_external_call() {
entry:
tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
call void @external()
ret void
}
; GCN: .set amdgpu.max_num_vgpr, 50
; GCN: .set amdgpu.max_num_agpr, 0
; GCN: .set amdgpu.max_num_sgpr, 80
; GCN-LABEL: amdhsa.kernels:
; GCN: .name: count_use_sgpr96_external_call
; CI: .sgpr_count: 84
; VI-NOBUG: .sgpr_count: 86
; VI-BUG: .sgpr_count: 96
; GCN: .vgpr_count: 50
; GCN: .name: count_use_sgpr160_external_call
; CI: .sgpr_count: 84
; VI-NOBUG: .sgpr_count: 86
; VI-BUG: .sgpr_count: 96
; GCN: .vgpr_count: 50
; GCN: .name: count_use_vgpr160_external_call
; CI: .sgpr_count: 84
; VI-NOBUG: .sgpr_count: 86
; VI-BUG: .sgpr_count: 96
; GCN: .vgpr_count: 50
attributes #0 = { nounwind noinline norecurse "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #1 = { nounwind noinline norecurse "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #2 = { nounwind noinline }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}