
This reverts commit adaff46d087799072438dd744b038e6fd50a2d78. Drop the -O3 checks from default-attributes.hip. I don't know why they are different on some bots but reverting this is far too disruptive.
345 lines
15 KiB
LLVM
345 lines
15 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
|
|
|
|
; Make sure the stack is never realigned for entry functions.
|
|
|
|
define amdgpu_kernel void @max_alignment_128() #0 {
|
|
; VI-LABEL: max_alignment_128:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_add_u32 s0, s0, s17
|
|
; VI-NEXT: s_addc_u32 s1, s1, 0
|
|
; VI-NEXT: v_mov_b32_e32 v0, 3
|
|
; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: v_mov_b32_e32 v0, 9
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_endpgm
|
|
; VI-NEXT: .section .rodata,"a"
|
|
; VI-NEXT: .p2align 6
|
|
; VI-NEXT: .amdhsa_kernel max_alignment_128
|
|
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
|
|
; VI-NEXT: .amdhsa_private_segment_fixed_size 256
|
|
; VI-NEXT: .amdhsa_kernarg_size 56
|
|
; VI-NEXT: .amdhsa_user_sgpr_count 14
|
|
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
|
|
; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
|
|
; VI-NEXT: .amdhsa_system_vgpr_workitem_id 2
|
|
; VI-NEXT: .amdhsa_next_free_vgpr 1
|
|
; VI-NEXT: .amdhsa_next_free_sgpr 18
|
|
; VI-NEXT: .amdhsa_reserve_vcc 0
|
|
; VI-NEXT: .amdhsa_reserve_flat_scratch 0
|
|
; VI-NEXT: .amdhsa_float_round_mode_32 0
|
|
; VI-NEXT: .amdhsa_float_round_mode_16_64 0
|
|
; VI-NEXT: .amdhsa_float_denorm_mode_32 3
|
|
; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
|
|
; VI-NEXT: .amdhsa_dx10_clamp 1
|
|
; VI-NEXT: .amdhsa_ieee_mode 1
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
|
|
; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
|
|
; VI-NEXT: .amdhsa_exception_int_div_zero 0
|
|
; VI-NEXT: .end_amdhsa_kernel
|
|
; VI-NEXT: .text
|
|
;
|
|
; GFX9-LABEL: max_alignment_128:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_add_u32 s0, s0, s17
|
|
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 3
|
|
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_endpgm
|
|
; GFX9-NEXT: .section .rodata,"a"
|
|
; GFX9-NEXT: .p2align 6
|
|
; GFX9-NEXT: .amdhsa_kernel max_alignment_128
|
|
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
|
|
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256
|
|
; GFX9-NEXT: .amdhsa_kernarg_size 56
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_count 14
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
|
|
; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 2
|
|
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
|
|
; GFX9-NEXT: .amdhsa_next_free_sgpr 18
|
|
; GFX9-NEXT: .amdhsa_reserve_vcc 0
|
|
; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0
|
|
; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
|
|
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
|
|
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
|
|
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
|
|
; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
|
|
; GFX9-NEXT: .amdhsa_dx10_clamp 1
|
|
; GFX9-NEXT: .amdhsa_ieee_mode 1
|
|
; GFX9-NEXT: .amdhsa_fp16_overflow 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
|
|
; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
|
|
; GFX9-NEXT: .end_amdhsa_kernel
|
|
; GFX9-NEXT: .text
|
|
%clutter = alloca i8, addrspace(5) ; Force non-zero offset for next alloca
|
|
store volatile i8 3, ptr addrspace(5) %clutter
|
|
%alloca.align = alloca i32, align 128, addrspace(5)
|
|
store volatile i32 9, ptr addrspace(5) %alloca.align, align 128
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @stackrealign_attr() #1 {
|
|
; VI-LABEL: stackrealign_attr:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_add_u32 s0, s0, s17
|
|
; VI-NEXT: s_addc_u32 s1, s1, 0
|
|
; VI-NEXT: v_mov_b32_e32 v0, 3
|
|
; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: v_mov_b32_e32 v0, 9
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_endpgm
|
|
; VI-NEXT: .section .rodata,"a"
|
|
; VI-NEXT: .p2align 6
|
|
; VI-NEXT: .amdhsa_kernel stackrealign_attr
|
|
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
|
|
; VI-NEXT: .amdhsa_private_segment_fixed_size 12
|
|
; VI-NEXT: .amdhsa_kernarg_size 56
|
|
; VI-NEXT: .amdhsa_user_sgpr_count 14
|
|
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
|
|
; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
|
|
; VI-NEXT: .amdhsa_system_vgpr_workitem_id 2
|
|
; VI-NEXT: .amdhsa_next_free_vgpr 1
|
|
; VI-NEXT: .amdhsa_next_free_sgpr 18
|
|
; VI-NEXT: .amdhsa_reserve_vcc 0
|
|
; VI-NEXT: .amdhsa_reserve_flat_scratch 0
|
|
; VI-NEXT: .amdhsa_float_round_mode_32 0
|
|
; VI-NEXT: .amdhsa_float_round_mode_16_64 0
|
|
; VI-NEXT: .amdhsa_float_denorm_mode_32 3
|
|
; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
|
|
; VI-NEXT: .amdhsa_dx10_clamp 1
|
|
; VI-NEXT: .amdhsa_ieee_mode 1
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
|
|
; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
|
|
; VI-NEXT: .amdhsa_exception_int_div_zero 0
|
|
; VI-NEXT: .end_amdhsa_kernel
|
|
; VI-NEXT: .text
|
|
;
|
|
; GFX9-LABEL: stackrealign_attr:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_add_u32 s0, s0, s17
|
|
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 3
|
|
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_endpgm
|
|
; GFX9-NEXT: .section .rodata,"a"
|
|
; GFX9-NEXT: .p2align 6
|
|
; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
|
|
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
|
|
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 12
|
|
; GFX9-NEXT: .amdhsa_kernarg_size 56
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_count 14
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
|
|
; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 2
|
|
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
|
|
; GFX9-NEXT: .amdhsa_next_free_sgpr 18
|
|
; GFX9-NEXT: .amdhsa_reserve_vcc 0
|
|
; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0
|
|
; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
|
|
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
|
|
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
|
|
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
|
|
; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
|
|
; GFX9-NEXT: .amdhsa_dx10_clamp 1
|
|
; GFX9-NEXT: .amdhsa_ieee_mode 1
|
|
; GFX9-NEXT: .amdhsa_fp16_overflow 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
|
|
; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
|
|
; GFX9-NEXT: .end_amdhsa_kernel
|
|
; GFX9-NEXT: .text
|
|
%clutter = alloca i8, addrspace(5) ; Force non-zero offset for next alloca
|
|
store volatile i8 3, ptr addrspace(5) %clutter
|
|
%alloca.align = alloca i32, align 4, addrspace(5)
|
|
store volatile i32 9, ptr addrspace(5) %alloca.align, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @alignstack_attr() #2 {
|
|
; VI-LABEL: alignstack_attr:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_add_u32 s0, s0, s17
|
|
; VI-NEXT: s_addc_u32 s1, s1, 0
|
|
; VI-NEXT: v_mov_b32_e32 v0, 3
|
|
; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: v_mov_b32_e32 v0, 9
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_endpgm
|
|
; VI-NEXT: .section .rodata,"a"
|
|
; VI-NEXT: .p2align 6
|
|
; VI-NEXT: .amdhsa_kernel alignstack_attr
|
|
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
|
|
; VI-NEXT: .amdhsa_private_segment_fixed_size 128
|
|
; VI-NEXT: .amdhsa_kernarg_size 56
|
|
; VI-NEXT: .amdhsa_user_sgpr_count 14
|
|
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
|
|
; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
|
|
; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
|
|
; VI-NEXT: .amdhsa_system_vgpr_workitem_id 2
|
|
; VI-NEXT: .amdhsa_next_free_vgpr 1
|
|
; VI-NEXT: .amdhsa_next_free_sgpr 18
|
|
; VI-NEXT: .amdhsa_reserve_vcc 0
|
|
; VI-NEXT: .amdhsa_reserve_flat_scratch 0
|
|
; VI-NEXT: .amdhsa_float_round_mode_32 0
|
|
; VI-NEXT: .amdhsa_float_round_mode_16_64 0
|
|
; VI-NEXT: .amdhsa_float_denorm_mode_32 3
|
|
; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
|
|
; VI-NEXT: .amdhsa_dx10_clamp 1
|
|
; VI-NEXT: .amdhsa_ieee_mode 1
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
|
|
; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
|
|
; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
|
|
; VI-NEXT: .amdhsa_exception_int_div_zero 0
|
|
; VI-NEXT: .end_amdhsa_kernel
|
|
; VI-NEXT: .text
|
|
;
|
|
; GFX9-LABEL: alignstack_attr:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_add_u32 s0, s0, s17
|
|
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 3
|
|
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_endpgm
|
|
; GFX9-NEXT: .section .rodata,"a"
|
|
; GFX9-NEXT: .p2align 6
|
|
; GFX9-NEXT: .amdhsa_kernel alignstack_attr
|
|
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
|
|
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128
|
|
; GFX9-NEXT: .amdhsa_kernarg_size 56
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_count 14
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
|
|
; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
|
|
; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 2
|
|
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
|
|
; GFX9-NEXT: .amdhsa_next_free_sgpr 18
|
|
; GFX9-NEXT: .amdhsa_reserve_vcc 0
|
|
; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0
|
|
; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
|
|
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
|
|
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
|
|
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
|
|
; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
|
|
; GFX9-NEXT: .amdhsa_dx10_clamp 1
|
|
; GFX9-NEXT: .amdhsa_ieee_mode 1
|
|
; GFX9-NEXT: .amdhsa_fp16_overflow 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
|
|
; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
|
|
; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
|
|
; GFX9-NEXT: .end_amdhsa_kernel
|
|
; GFX9-NEXT: .text
|
|
%clutter = alloca i8, addrspace(5) ; Force non-zero offset for next alloca
|
|
store volatile i8 3, ptr addrspace(5) %clutter
|
|
%alloca.align = alloca i32, align 4, addrspace(5)
|
|
store volatile i32 9, ptr addrspace(5) %alloca.align, align 4
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind "stackrealign" }
|
|
attributes #2 = { nounwind alignstack=128 }
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
|