
Recent upstream trends have moved away from explicitly using `-verify-machineinstrs`, as it's already covered by the expensive checks. This PR removes almost all `-verify-machineinstrs` from tests in `llvm/test/CodeGen/AMDGPU/*.ll`, leaving only those tests where its removal currently causes failures.
315 lines
12 KiB
LLVM
315 lines
12 KiB
LLVM
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 < %s | FileCheck --check-prefixes=CHECK,GFX11 %s
|
|
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck --check-prefixes=CHECK,GFX12 %s
|
|
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr < %s | FileCheck --check-prefixes=CHECK,GFX12,DVGPR %s
|
|
|
|
; CHECK: .amdgpu_pal_metadata
|
|
; CHECK-NEXT: ---
|
|
; CHECK-NEXT: amdpal.pipelines:
|
|
; CHECK-NEXT: - .api: Vulkan
|
|
; CHECK-NEXT: .compute_registers:
|
|
; DVGPR-NEXT: .dynamic_vgpr_en: true
|
|
; CHECK-NEXT: .tg_size_en: true
|
|
; CHECK-NEXT: .tgid_x_en: false
|
|
; CHECK-NEXT: .tgid_y_en: false
|
|
; CHECK-NEXT: .tgid_z_en: false
|
|
; CHECK-NEXT: .tidig_comp_cnt: 0x1
|
|
; CHECK-NEXT: .hardware_stages:
|
|
; CHECK-NEXT: .cs:
|
|
; CHECK-NEXT: .checksum_value: 0x9444d7d0
|
|
; CHECK-NEXT: .debug_mode: 0
|
|
; CHECK-NEXT: .excp_en: 0
|
|
; CHECK-NEXT: .float_mode: 0xc0
|
|
; CHECK-NEXT: .forward_progress: true
|
|
; GFX11-NEXT: .ieee_mode: true
|
|
; CHECK-NEXT: .image_op: false
|
|
; CHECK-NEXT: .lds_size: 0x200
|
|
; CHECK-NEXT: .mem_ordered: true
|
|
; CHECK-NEXT: .sgpr_limit: 0x6a
|
|
; CHECK-NEXT: .threadgroup_dimensions:
|
|
; CHECK-NEXT: - 0x1
|
|
; CHECK-NEXT: - 0x400
|
|
; CHECK-NEXT: - 0x1
|
|
; CHECK-NEXT: .trap_present: false
|
|
; CHECK-NEXT: .user_data_reg_map:
|
|
; CHECK-NEXT: - 0x10000000
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: - 0xffffffff
|
|
; CHECK-NEXT: .user_sgprs: 0x3
|
|
; CHECK-NEXT: .vgpr_limit: 0x100
|
|
; CHECK-NEXT: .wavefront_size: 0x40
|
|
; CHECK-NEXT: .wgp_mode: true
|
|
; CHECK: .registers: {}
|
|
; CHECK-NEXT: .shader_functions:
|
|
; CHECK-NEXT: dynamic_stack:
|
|
; CHECK-NEXT: .backend_stack_size: 0x10
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; CHECK-NEXT: .sgpr_count: 0x22
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
|
|
; CHECK-NEXT: .vgpr_count: 0x2
|
|
; CHECK-NEXT: dynamic_stack_loop:
|
|
; CHECK-NEXT: .backend_stack_size: 0x10
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; CHECK-NEXT: .sgpr_count: 0x22
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
|
|
; CHECK-NEXT: .vgpr_count: 0x3
|
|
; CHECK-NEXT: multiple_stack:
|
|
; CHECK-NEXT: .backend_stack_size: 0x24
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; CHECK-NEXT: .sgpr_count: 0x21
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x24
|
|
; CHECK-NEXT: .vgpr_count: 0x3
|
|
; CHECK-NEXT: no_stack:
|
|
; CHECK-NEXT: .backend_stack_size: 0
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; CHECK-NEXT: .sgpr_count: 0x20
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0
|
|
; CHECK-NEXT: .vgpr_count: 0x1
|
|
; CHECK-NEXT: no_stack_call:
|
|
; CHECK-NEXT: .backend_stack_size: 0x10
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; CHECK-NEXT: .sgpr_count: 0x22
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
|
|
; CHECK-NEXT: .vgpr_count: 0x3
|
|
; CHECK-NEXT: no_stack_extern_call:
|
|
; CHECK-NEXT: .backend_stack_size: 0x10
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; GFX11-NEXT: .sgpr_count: 0x29
|
|
; GFX12-NEXT: .sgpr_count: 0x24
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
|
|
; CHECK-NEXT: .vgpr_count: 0x58
|
|
; CHECK-NEXT: no_stack_extern_call_many_args:
|
|
; CHECK-NEXT: .backend_stack_size: 0x90
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; GFX11-NEXT: .sgpr_count: 0x29
|
|
; GFX12-NEXT: .sgpr_count: 0x24
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x90
|
|
; CHECK-NEXT: .vgpr_count: 0x58
|
|
; CHECK-NEXT: no_stack_indirect_call:
|
|
; CHECK-NEXT: .backend_stack_size: 0x10
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; GFX11-NEXT: .sgpr_count: 0x29
|
|
; GFX12-NEXT: .sgpr_count: 0x24
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
|
|
; CHECK-NEXT: .vgpr_count: 0x58
|
|
; CHECK-NEXT: simple_lds:
|
|
; CHECK-NEXT: .backend_stack_size: 0
|
|
; CHECK-NEXT: .lds_size: 0x100
|
|
; CHECK-NEXT: .sgpr_count: 0x20
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0
|
|
; CHECK-NEXT: .vgpr_count: 0x1
|
|
; CHECK-NEXT: simple_lds_recurse:
|
|
; CHECK-NEXT: .backend_stack_size: 0x10
|
|
; CHECK-NEXT: .lds_size: 0x100
|
|
; CHECK-NEXT: .sgpr_count: 0x24
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
|
|
; CHECK-NEXT: .vgpr_count: 0x29
|
|
; CHECK-NEXT: simple_stack:
|
|
; CHECK-NEXT: .backend_stack_size: 0x14
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; CHECK-NEXT: .sgpr_count: 0x21
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x14
|
|
; CHECK-NEXT: .vgpr_count: 0x2
|
|
; CHECK-NEXT: simple_stack_call:
|
|
; CHECK-NEXT: .backend_stack_size: 0x20
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; CHECK-NEXT: .sgpr_count: 0x22
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
|
|
; CHECK-NEXT: .vgpr_count: 0x4
|
|
; CHECK-NEXT: simple_stack_extern_call:
|
|
; CHECK-NEXT: .backend_stack_size: 0x20
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; GFX11-NEXT: .sgpr_count: 0x29
|
|
; GFX12-NEXT: .sgpr_count: 0x24
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
|
|
; CHECK-NEXT: .vgpr_count: 0x58
|
|
; CHECK-NEXT: simple_stack_indirect_call:
|
|
; CHECK-NEXT: .backend_stack_size: 0x20
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; GFX11-NEXT: .sgpr_count: 0x29
|
|
; GFX12-NEXT: .sgpr_count: 0x24
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
|
|
; CHECK-NEXT: .vgpr_count: 0x58
|
|
; CHECK-NEXT: simple_stack_recurse:
|
|
; CHECK-NEXT: .backend_stack_size: 0x20
|
|
; CHECK-NEXT: .lds_size: 0
|
|
; CHECK-NEXT: .sgpr_count: 0x24
|
|
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
|
|
; CHECK-NEXT: .vgpr_count: 0x2a
|
|
; CHECK:amdpal.version:
|
|
; CHECK-NEXT: - 0x3
|
|
; CHECK-NEXT: - 0
|
|
; CHECK-NEXT:...
|
|
; CHECK-NEXT: .end_amdgpu_pal_metadata
|
|
|
|
declare amdgpu_gfx float @extern_func(float) #0
|
|
declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
|
|
|
|
@funcptr = external hidden unnamed_addr addrspace(4) constant ptr, align 4
|
|
|
|
define amdgpu_gfx float @no_stack(float %arg0) #0 {
|
|
%add = fadd float %arg0, 1.0
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, ptr addrspace(5) %stack
|
|
%val = load volatile float, ptr addrspace(5) %stack
|
|
%add = fadd float %arg0, %val
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, ptr addrspace(5) %stack
|
|
%val = load volatile float, ptr addrspace(5) %stack
|
|
%add = fadd float %arg0, %val
|
|
%stack2 = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, ptr addrspace(5) %stack2
|
|
%val2 = load volatile float, ptr addrspace(5) %stack2
|
|
%add2 = fadd float %add, %val2
|
|
ret float %add2
|
|
}
|
|
|
|
define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
|
|
bb0:
|
|
%cmp = fcmp ogt float %arg0, 0.0
|
|
br i1 %cmp, label %bb1, label %bb2
|
|
|
|
bb1:
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, ptr addrspace(5) %stack
|
|
%val = load volatile float, ptr addrspace(5) %stack
|
|
%add = fadd float %arg0, %val
|
|
br label %bb2
|
|
|
|
bb2:
|
|
%res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
|
|
bb0:
|
|
br label %bb1
|
|
|
|
bb1:
|
|
%ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, ptr addrspace(5) %stack
|
|
%val = load volatile float, ptr addrspace(5) %stack
|
|
%add = fadd float %arg0, %val
|
|
%cmp = icmp sgt i32 %ctr, 0
|
|
%newctr = sub i32 %ctr, 1
|
|
br i1 %cmp, label %bb1, label %bb2
|
|
|
|
bb2:
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
|
|
%res = call amdgpu_gfx float @simple_stack(float %arg0)
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, ptr addrspace(5) %stack
|
|
%val = load volatile float, ptr addrspace(5) %stack
|
|
%res = call amdgpu_gfx float @simple_stack(float %arg0)
|
|
%add = fadd float %res, %val
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
|
|
%res = call amdgpu_gfx float @extern_func(float %arg0)
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, ptr addrspace(5) %stack
|
|
%val = load volatile float, ptr addrspace(5) %stack
|
|
%res = call amdgpu_gfx float @extern_func(float %arg0)
|
|
%add = fadd float %res, %val
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
|
|
%res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
|
|
%fptr = load ptr, ptr addrspace(4) @funcptr
|
|
call amdgpu_gfx void %fptr()
|
|
ret float %arg0
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, ptr addrspace(5) %stack
|
|
%val = load volatile float, ptr addrspace(5) %stack
|
|
%fptr = load ptr, ptr addrspace(4) @funcptr
|
|
call amdgpu_gfx void %fptr()
|
|
%add = fadd float %arg0, %val
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, ptr addrspace(5) %stack
|
|
%val = load volatile float, ptr addrspace(5) %stack
|
|
%res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
|
|
%add = fadd float %res, %val
|
|
ret float %add
|
|
}
|
|
|
|
@lds = internal addrspace(3) global [64 x float] poison
|
|
|
|
define amdgpu_gfx float @simple_lds(float %arg0) #0 {
|
|
%val = load float, ptr addrspace(3) @lds
|
|
ret float %val
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
|
|
%val = load float, ptr addrspace(3) @lds
|
|
%res = call amdgpu_gfx float @simple_lds_recurse(float %val)
|
|
ret float %res
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
|
|
!amdgpu.pal.metadata.msgpack = !{!0}
|
|
|
|
!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"}
|
|
!1 = !{i32 7}
|