[AMDGPU] Enable XNACK on gfx1250 (#161457)

This should be always on.

Fixes SWDEV-555931.
This commit is contained in:
Shilei Tian 2025-10-03 11:04:55 -04:00 committed by GitHub
parent 78739ff84a
commit e7f47e70e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 1266 additions and 1177 deletions

View File

@ -2126,6 +2126,8 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureLdsBarrierArriveAtomic,
FeatureSetPrioIncWgInst,
Feature45BitNumRecordsBufferResource,
FeatureSupportsXNACK,
FeatureXNACK,
]>;
def FeatureISAVersion12_51 : FeatureSet<

View File

@ -90,26 +90,24 @@ define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) {
; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v10, v[0:1], off offset:8
; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v11, v[0:1], off offset:9
; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v12, v[0:1], off offset:11
; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v0, v[0:1], off offset:10
; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v13, v[0:1], off offset:10
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0xa
; GFX1250-NOUNALIGNED-NEXT: s_wait_xcnt 0x0
; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2
; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 8, v2
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x8
; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v3, 16, v4 :: v_dual_lshlrev_b32 v2, 24, v5
; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v2, 16, v4 :: v_dual_lshlrev_b32 v1, 24, v5
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x6
; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v4, v7, 8, v6
; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v3, v7, 8, v6
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x4
; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v6, 16, v8 :: v_dual_lshlrev_b32 v5, 24, v9
; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v5, 16, v8 :: v_dual_lshlrev_b32 v4, 24, v9
; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v0, v1, v2, v0
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x2
; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v7, v11, 8, v10
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x1
; GFX1250-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v12
; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v6, v11, 8, v10
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
; GFX1250-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v0, v2, v3, v1
; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v4
; GFX1250-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v7
; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v7, 24, v12 :: v_dual_lshlrev_b32 v8, 16, v13
; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v1, v4, v5, v3
; GFX1250-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v2, v7, v8, v6
; GFX1250-NOUNALIGNED-NEXT: s_set_pc_i64 s[30:31]
;
; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
@ -942,7 +940,7 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg
;
; GFX1250-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1:
; GFX1250-NOUNALIGNED: ; %bb.0:
; GFX1250-NOUNALIGNED-NEXT: s_clause 0xa
; GFX1250-NOUNALIGNED-NEXT: s_clause 0xb
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s2, s[0:1], 0x1
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s3, s[0:1], 0x3
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s4, s[0:1], 0x2
@ -954,27 +952,26 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s10, s[0:1], 0x0
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s11, s[0:1], 0x4
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s12, s[0:1], 0xa
; GFX1250-NOUNALIGNED-NEXT: s_wait_xcnt 0x0
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s1, s[0:1], 0x8
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s13, s[0:1], 0x8
; GFX1250-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s0, s2, 8
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s2, s3, 24
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s3, s4, 16
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s4, s5, 8
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s3
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s5, s6, 24
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s6, s7, 16
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s7, s8, 8
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s1, s3, 24
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s2, s4, 16
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s3, s5, 8
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s1, s1, s2
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s4, s6, 24
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s5, s7, 16
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s6, s8, 8
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s0, s0, s10
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s8, s9, 24
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s0, s2, s0
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s2, s12, 16
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s3, s4, s11
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s4, s5, s6
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s7, s9, 24
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s0, s1, s0
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s1, s12, 16
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s3, s11
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s3, s4, s5
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s4, s6, s13
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s5, s7, s1
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s8, s2
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s1, s4, s3
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s5
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s1, s3, s2
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s5, s4
; GFX1250-NOUNALIGNED-NEXT: ; return to shader part epilog
;
; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
@ -1351,11 +1348,25 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg
}
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) {
; GFX12-LABEL: s_load_constant_v3i32_align4:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align4:
; GFX12-UNALIGNED: ; %bb.0:
; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
;
; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align4:
; GFX12-NOUNALIGNED: ; %bb.0:
; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: s_load_constant_v3i32_align4:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_load_constant_v3i32_align4:
; GFX9: ; %bb.0:
@ -1388,11 +1399,25 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg
}
define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
; GFX12-LABEL: s_load_constant_i96_align8:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
; GFX12-UNALIGNED-LABEL: s_load_constant_i96_align8:
; GFX12-UNALIGNED: ; %bb.0:
; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
;
; GFX12-NOUNALIGNED-LABEL: s_load_constant_i96_align8:
; GFX12-NOUNALIGNED: ; %bb.0:
; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: s_load_constant_i96_align8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_load_constant_i96_align8:
; GFX9: ; %bb.0:
@ -1425,11 +1450,25 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
}
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg %ptr) {
; GFX12-LABEL: s_load_constant_v3i32_align8:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align8:
; GFX12-UNALIGNED: ; %bb.0:
; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
;
; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align8:
; GFX12-NOUNALIGNED: ; %bb.0:
; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: s_load_constant_v3i32_align8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_load_constant_v3i32_align8:
; GFX9: ; %bb.0:
@ -1462,11 +1501,25 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg
}
define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg %ptr) {
; GFX12-LABEL: s_load_constant_v6i16_align8:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
; GFX12-UNALIGNED-LABEL: s_load_constant_v6i16_align8:
; GFX12-UNALIGNED: ; %bb.0:
; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
;
; GFX12-NOUNALIGNED-LABEL: s_load_constant_v6i16_align8:
; GFX12-NOUNALIGNED: ; %bb.0:
; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: s_load_constant_v6i16_align8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_load_constant_v6i16_align8:
; GFX9: ; %bb.0:
@ -1500,24 +1553,64 @@ define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg
}
define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg %ptr) {
; GFX12-LABEL: s_load_constant_v12i8_align8:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_lshr_b32 s13, s0, 8
; GFX12-NEXT: s_lshr_b32 s12, s0, 16
; GFX12-NEXT: s_lshr_b32 s3, s0, 24
; GFX12-NEXT: s_lshr_b32 s5, s1, 8
; GFX12-NEXT: s_lshr_b32 s6, s1, 16
; GFX12-NEXT: s_lshr_b32 s7, s1, 24
; GFX12-NEXT: s_lshr_b32 s9, s2, 8
; GFX12-NEXT: s_lshr_b32 s10, s2, 16
; GFX12-NEXT: s_lshr_b32 s11, s2, 24
; GFX12-NEXT: s_mov_b32 s4, s1
; GFX12-NEXT: s_mov_b32 s8, s2
; GFX12-NEXT: s_mov_b32 s1, s13
; GFX12-NEXT: s_mov_b32 s2, s12
; GFX12-NEXT: ; return to shader part epilog
; GFX12-UNALIGNED-LABEL: s_load_constant_v12i8_align8:
; GFX12-UNALIGNED: ; %bb.0:
; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-UNALIGNED-NEXT: s_lshr_b32 s13, s0, 8
; GFX12-UNALIGNED-NEXT: s_lshr_b32 s12, s0, 16
; GFX12-UNALIGNED-NEXT: s_lshr_b32 s3, s0, 24
; GFX12-UNALIGNED-NEXT: s_lshr_b32 s5, s1, 8
; GFX12-UNALIGNED-NEXT: s_lshr_b32 s6, s1, 16
; GFX12-UNALIGNED-NEXT: s_lshr_b32 s7, s1, 24
; GFX12-UNALIGNED-NEXT: s_lshr_b32 s9, s2, 8
; GFX12-UNALIGNED-NEXT: s_lshr_b32 s10, s2, 16
; GFX12-UNALIGNED-NEXT: s_lshr_b32 s11, s2, 24
; GFX12-UNALIGNED-NEXT: s_mov_b32 s4, s1
; GFX12-UNALIGNED-NEXT: s_mov_b32 s8, s2
; GFX12-UNALIGNED-NEXT: s_mov_b32 s1, s13
; GFX12-UNALIGNED-NEXT: s_mov_b32 s2, s12
; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
;
; GFX12-NOUNALIGNED-LABEL: s_load_constant_v12i8_align8:
; GFX12-NOUNALIGNED: ; %bb.0:
; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s13, s0, 8
; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s12, s0, 16
; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s3, s0, 24
; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s5, s1, 8
; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s6, s1, 16
; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s7, s1, 24
; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s9, s2, 8
; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s10, s2, 16
; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s11, s2, 24
; GFX12-NOUNALIGNED-NEXT: s_mov_b32 s4, s1
; GFX12-NOUNALIGNED-NEXT: s_mov_b32 s8, s2
; GFX12-NOUNALIGNED-NEXT: s_mov_b32 s1, s13
; GFX12-NOUNALIGNED-NEXT: s_mov_b32 s2, s12
; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: s_load_constant_v12i8_align8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_lshr_b32 s13, s0, 8
; GFX1250-NEXT: s_lshr_b32 s12, s0, 16
; GFX1250-NEXT: s_lshr_b32 s3, s0, 24
; GFX1250-NEXT: s_lshr_b32 s5, s1, 8
; GFX1250-NEXT: s_lshr_b32 s6, s1, 16
; GFX1250-NEXT: s_lshr_b32 s7, s1, 24
; GFX1250-NEXT: s_lshr_b32 s9, s2, 8
; GFX1250-NEXT: s_lshr_b32 s10, s2, 16
; GFX1250-NEXT: s_lshr_b32 s11, s2, 24
; GFX1250-NEXT: s_mov_b32 s4, s1
; GFX1250-NEXT: s_mov_b32 s8, s2
; GFX1250-NEXT: s_mov_b32 s1, s13
; GFX1250-NEXT: s_mov_b32 s2, s12
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_load_constant_v12i8_align8:
; GFX9: ; %bb.0:

View File

@ -569,10 +569,10 @@ define <16 x bfloat> @v_load_global_v16bf16(ptr addrspace(1) %ptr) {
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0
; GFX1250-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v8, v0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b128 v[0:3], v[4:5], off
; GFX1250-NEXT: global_load_b128 v[4:7], v[4:5], off offset:16
; GFX1250-NEXT: global_load_b128 v[0:3], v[8:9], off
; GFX1250-NEXT: global_load_b128 v[4:7], v[8:9], off offset:16
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load <16 x bfloat>, ptr addrspace(1) %ptr
@ -752,12 +752,12 @@ define <32 x bfloat> @v_load_global_v32bf16(ptr addrspace(1) %ptr) {
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v13, v1 :: v_dual_mov_b32 v12, v0
; GFX1250-NEXT: v_dual_mov_b32 v17, v1 :: v_dual_mov_b32 v16, v0
; GFX1250-NEXT: s_clause 0x3
; GFX1250-NEXT: global_load_b128 v[0:3], v[12:13], off
; GFX1250-NEXT: global_load_b128 v[4:7], v[12:13], off offset:16
; GFX1250-NEXT: global_load_b128 v[8:11], v[12:13], off offset:32
; GFX1250-NEXT: global_load_b128 v[12:15], v[12:13], off offset:48
; GFX1250-NEXT: global_load_b128 v[0:3], v[16:17], off
; GFX1250-NEXT: global_load_b128 v[4:7], v[16:17], off offset:16
; GFX1250-NEXT: global_load_b128 v[8:11], v[16:17], off offset:32
; GFX1250-NEXT: global_load_b128 v[12:15], v[16:17], off offset:48
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load <32 x bfloat>, ptr addrspace(1) %ptr
@ -1055,16 +1055,16 @@ define <64 x bfloat> @v_load_global_v64bf16(ptr addrspace(1) %ptr) {
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v29, v1 :: v_dual_mov_b32 v28, v0
; GFX1250-NEXT: v_dual_mov_b32 v33, v1 :: v_dual_mov_b32 v32, v0
; GFX1250-NEXT: s_clause 0x7
; GFX1250-NEXT: global_load_b128 v[0:3], v[28:29], off
; GFX1250-NEXT: global_load_b128 v[4:7], v[28:29], off offset:16
; GFX1250-NEXT: global_load_b128 v[8:11], v[28:29], off offset:32
; GFX1250-NEXT: global_load_b128 v[12:15], v[28:29], off offset:48
; GFX1250-NEXT: global_load_b128 v[16:19], v[28:29], off offset:64
; GFX1250-NEXT: global_load_b128 v[20:23], v[28:29], off offset:80
; GFX1250-NEXT: global_load_b128 v[24:27], v[28:29], off offset:96
; GFX1250-NEXT: global_load_b128 v[28:31], v[28:29], off offset:112
; GFX1250-NEXT: global_load_b128 v[0:3], v[32:33], off
; GFX1250-NEXT: global_load_b128 v[4:7], v[32:33], off offset:16
; GFX1250-NEXT: global_load_b128 v[8:11], v[32:33], off offset:32
; GFX1250-NEXT: global_load_b128 v[12:15], v[32:33], off offset:48
; GFX1250-NEXT: global_load_b128 v[16:19], v[32:33], off offset:64
; GFX1250-NEXT: global_load_b128 v[20:23], v[32:33], off offset:80
; GFX1250-NEXT: global_load_b128 v[24:27], v[32:33], off offset:96
; GFX1250-NEXT: global_load_b128 v[28:31], v[32:33], off offset:112
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load <64 x bfloat>, ptr addrspace(1) %ptr

View File

@ -2700,142 +2700,142 @@ define amdgpu_kernel void @amd_kernel_v32i8(<32 x i8> %arg0) {
;
; GFX1250-LABEL: amd_kernel_v32i8:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[8:9], 16
; GFX1250-NEXT: v_mov_b64_e32 v[10:11], 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_lshr_b32 s16, s0, 16
; GFX1250-NEXT: s_lshr_b32 s17, s0, 24
; GFX1250-NEXT: s_lshr_b32 s20, s2, 16
; GFX1250-NEXT: s_lshr_b32 s21, s2, 24
; GFX1250-NEXT: s_lshr_b32 s14, s7, 16
; GFX1250-NEXT: s_lshr_b32 s15, s7, 24
; GFX1250-NEXT: s_bfe_u32 s27, s7, 0x80008
; GFX1250-NEXT: s_lshr_b32 s16, s8, 16
; GFX1250-NEXT: s_lshr_b32 s17, s8, 24
; GFX1250-NEXT: s_lshr_b32 s6, s15, 16
; GFX1250-NEXT: s_lshr_b32 s7, s15, 24
; GFX1250-NEXT: s_bfe_u32 s27, s15, 0x80008
; GFX1250-NEXT: s_add_co_i32 s17, s17, s17
; GFX1250-NEXT: s_add_co_i32 s16, s16, s16
; GFX1250-NEXT: s_lshr_b32 s18, s1, 16
; GFX1250-NEXT: s_lshr_b32 s19, s1, 24
; GFX1250-NEXT: s_lshr_b32 s22, s3, 16
; GFX1250-NEXT: s_lshr_b32 s23, s3, 24
; GFX1250-NEXT: s_bfe_u32 s29, s1, 0x80008
; GFX1250-NEXT: s_bfe_u32 s30, s3, 0x80008
; GFX1250-NEXT: s_add_co_i32 s21, s21, s21
; GFX1250-NEXT: s_add_co_i32 s20, s20, s20
; GFX1250-NEXT: s_lshl_b32 s17, s17, 8
; GFX1250-NEXT: s_and_b32 s16, s16, 0xff
; GFX1250-NEXT: s_add_co_i32 s7, s7, s7
; GFX1250-NEXT: s_add_co_i32 s27, s27, s27
; GFX1250-NEXT: s_add_co_i32 s15, s15, s15
; GFX1250-NEXT: s_add_co_i32 s14, s14, s14
; GFX1250-NEXT: s_add_co_i32 s3, s3, s3
; GFX1250-NEXT: s_add_co_i32 s27, s27, s27
; GFX1250-NEXT: s_add_co_i32 s7, s7, s7
; GFX1250-NEXT: s_add_co_i32 s6, s6, s6
; GFX1250-NEXT: s_or_b32 s16, s16, s17
; GFX1250-NEXT: s_and_b32 s15, s15, 0xff
; GFX1250-NEXT: s_lshl_b32 s17, s27, 8
; GFX1250-NEXT: s_lshl_b32 s7, s7, 8
; GFX1250-NEXT: s_and_b32 s6, s6, 0xff
; GFX1250-NEXT: s_or_b32 s15, s15, s17
; GFX1250-NEXT: s_or_b32 s6, s6, s7
; GFX1250-NEXT: s_bfe_u32 s26, s14, 0x80008
; GFX1250-NEXT: s_and_b32 s7, s15, 0xffff
; GFX1250-NEXT: s_lshl_b32 s6, s6, 16
; GFX1250-NEXT: s_lshr_b32 s20, s10, 16
; GFX1250-NEXT: s_lshr_b32 s21, s10, 24
; GFX1250-NEXT: s_lshr_b32 s4, s14, 16
; GFX1250-NEXT: s_lshr_b32 s5, s14, 24
; GFX1250-NEXT: s_or_b32 s6, s7, s6
; GFX1250-NEXT: s_add_co_i32 s7, s14, s14
; GFX1250-NEXT: s_add_co_i32 s26, s26, s26
; GFX1250-NEXT: s_lshr_b32 s18, s9, 16
; GFX1250-NEXT: s_lshr_b32 s19, s9, 24
; GFX1250-NEXT: s_lshr_b32 s22, s11, 16
; GFX1250-NEXT: s_lshr_b32 s23, s11, 24
; GFX1250-NEXT: s_bfe_u32 s29, s9, 0x80008
; GFX1250-NEXT: s_bfe_u32 s30, s11, 0x80008
; GFX1250-NEXT: s_add_co_i32 s21, s21, s21
; GFX1250-NEXT: s_add_co_i32 s20, s20, s20
; GFX1250-NEXT: s_lshr_b32 s2, s13, 16
; GFX1250-NEXT: s_lshr_b32 s3, s13, 24
; GFX1250-NEXT: s_and_b32 s7, s7, 0xff
; GFX1250-NEXT: s_lshl_b32 s14, s26, 8
; GFX1250-NEXT: s_add_co_i32 s5, s5, s5
; GFX1250-NEXT: s_add_co_i32 s4, s4, s4
; GFX1250-NEXT: s_add_co_i32 s11, s11, s11
; GFX1250-NEXT: s_add_co_i32 s30, s30, s30
; GFX1250-NEXT: s_add_co_i32 s23, s23, s23
; GFX1250-NEXT: s_add_co_i32 s22, s22, s22
; GFX1250-NEXT: s_lshl_b32 s21, s21, 8
; GFX1250-NEXT: s_and_b32 s20, s20, 0xff
; GFX1250-NEXT: s_add_co_i32 s1, s1, s1
; GFX1250-NEXT: s_add_co_i32 s9, s9, s9
; GFX1250-NEXT: s_add_co_i32 s29, s29, s29
; GFX1250-NEXT: s_add_co_i32 s19, s19, s19
; GFX1250-NEXT: s_add_co_i32 s18, s18, s18
; GFX1250-NEXT: s_lshr_b32 s10, s5, 16
; GFX1250-NEXT: s_lshr_b32 s11, s5, 24
; GFX1250-NEXT: s_lshr_b32 s12, s6, 16
; GFX1250-NEXT: s_lshr_b32 s13, s6, 24
; GFX1250-NEXT: s_or_b32 s16, s16, s17
; GFX1250-NEXT: s_and_b32 s7, s7, 0xff
; GFX1250-NEXT: s_lshl_b32 s17, s27, 8
; GFX1250-NEXT: s_lshl_b32 s15, s15, 8
; GFX1250-NEXT: s_and_b32 s14, s14, 0xff
; GFX1250-NEXT: s_and_b32 s3, s3, 0xff
; GFX1250-NEXT: s_bfe_u32 s25, s13, 0x80008
; GFX1250-NEXT: s_lshl_b32 s5, s5, 8
; GFX1250-NEXT: s_and_b32 s4, s4, 0xff
; GFX1250-NEXT: s_or_b32 s7, s7, s14
; GFX1250-NEXT: s_add_co_i32 s3, s3, s3
; GFX1250-NEXT: s_add_co_i32 s2, s2, s2
; GFX1250-NEXT: s_and_b32 s11, s11, 0xff
; GFX1250-NEXT: s_lshl_b32 s30, s30, 8
; GFX1250-NEXT: s_lshl_b32 s23, s23, 8
; GFX1250-NEXT: s_and_b32 s22, s22, 0xff
; GFX1250-NEXT: s_or_b32 s20, s20, s21
; GFX1250-NEXT: s_and_b32 s1, s1, 0xff
; GFX1250-NEXT: s_and_b32 s9, s9, 0xff
; GFX1250-NEXT: s_lshl_b32 s21, s29, 8
; GFX1250-NEXT: s_lshl_b32 s19, s19, 8
; GFX1250-NEXT: s_and_b32 s18, s18, 0xff
; GFX1250-NEXT: s_lshr_b32 s8, s4, 16
; GFX1250-NEXT: s_lshr_b32 s9, s4, 24
; GFX1250-NEXT: s_bfe_u32 s24, s4, 0x80008
; GFX1250-NEXT: s_bfe_u32 s25, s5, 0x80008
; GFX1250-NEXT: s_bfe_u32 s26, s6, 0x80008
; GFX1250-NEXT: s_or_b32 s7, s7, s17
; GFX1250-NEXT: s_or_b32 s14, s14, s15
; GFX1250-NEXT: s_add_co_i32 s13, s13, s13
; GFX1250-NEXT: s_add_co_i32 s12, s12, s12
; GFX1250-NEXT: s_add_co_i32 s11, s11, s11
; GFX1250-NEXT: s_add_co_i32 s10, s10, s10
; GFX1250-NEXT: s_bfe_u32 s28, s0, 0x80008
; GFX1250-NEXT: s_or_b32 s3, s3, s30
; GFX1250-NEXT: s_or_b32 s22, s22, s23
; GFX1250-NEXT: s_bfe_u32 s23, s2, 0x80008
; GFX1250-NEXT: s_or_b32 s1, s1, s21
; GFX1250-NEXT: s_or_b32 s18, s18, s19
; GFX1250-NEXT: s_and_b32 s7, s7, 0xffff
; GFX1250-NEXT: s_lshl_b32 s14, s14, 16
; GFX1250-NEXT: s_add_co_i32 s6, s6, s6
; GFX1250-NEXT: s_add_co_i32 s26, s26, s26
; GFX1250-NEXT: s_lshl_b32 s13, s13, 8
; GFX1250-NEXT: s_and_b32 s12, s12, 0xff
; GFX1250-NEXT: s_add_co_i32 s5, s5, s5
; GFX1250-NEXT: s_lshr_b32 s0, s12, 16
; GFX1250-NEXT: s_lshr_b32 s1, s12, 24
; GFX1250-NEXT: s_bfe_u32 s24, s12, 0x80008
; GFX1250-NEXT: s_or_b32 s4, s4, s5
; GFX1250-NEXT: s_and_b32 s5, s7, 0xffff
; GFX1250-NEXT: s_add_co_i32 s7, s13, s13
; GFX1250-NEXT: s_add_co_i32 s25, s25, s25
; GFX1250-NEXT: s_lshl_b32 s11, s11, 8
; GFX1250-NEXT: s_and_b32 s10, s10, 0xff
; GFX1250-NEXT: s_add_co_i32 s4, s4, s4
; GFX1250-NEXT: s_add_co_i32 s24, s24, s24
; GFX1250-NEXT: s_add_co_i32 s9, s9, s9
; GFX1250-NEXT: s_add_co_i32 s8, s8, s8
; GFX1250-NEXT: s_and_b32 s3, s3, 0xffff
; GFX1250-NEXT: s_lshl_b32 s22, s22, 16
; GFX1250-NEXT: s_add_co_i32 s2, s2, s2
; GFX1250-NEXT: s_add_co_i32 s23, s23, s23
; GFX1250-NEXT: s_and_b32 s1, s1, 0xffff
; GFX1250-NEXT: s_lshl_b32 s18, s18, 16
; GFX1250-NEXT: s_add_co_i32 s0, s0, s0
; GFX1250-NEXT: s_add_co_i32 s28, s28, s28
; GFX1250-NEXT: s_or_b32 s7, s7, s14
; GFX1250-NEXT: s_and_b32 s6, s6, 0xff
; GFX1250-NEXT: s_lshl_b32 s14, s26, 8
; GFX1250-NEXT: s_or_b32 s12, s12, s13
; GFX1250-NEXT: s_and_b32 s5, s5, 0xff
; GFX1250-NEXT: s_lshl_b32 s13, s25, 8
; GFX1250-NEXT: s_or_b32 s10, s10, s11
; GFX1250-NEXT: s_and_b32 s4, s4, 0xff
; GFX1250-NEXT: s_lshl_b32 s11, s24, 8
; GFX1250-NEXT: s_lshl_b32 s9, s9, 8
; GFX1250-NEXT: s_and_b32 s8, s8, 0xff
; GFX1250-NEXT: s_or_b32 s3, s3, s22
; GFX1250-NEXT: s_lshl_b32 s3, s3, 8
; GFX1250-NEXT: s_and_b32 s2, s2, 0xff
; GFX1250-NEXT: s_lshl_b32 s22, s23, 8
; GFX1250-NEXT: s_or_b32 s1, s1, s18
; GFX1250-NEXT: s_bfe_u32 s28, s8, 0x80008
; GFX1250-NEXT: s_or_b32 s11, s11, s30
; GFX1250-NEXT: s_or_b32 s22, s22, s23
; GFX1250-NEXT: s_bfe_u32 s23, s10, 0x80008
; GFX1250-NEXT: s_or_b32 s9, s9, s21
; GFX1250-NEXT: s_or_b32 s18, s18, s19
; GFX1250-NEXT: s_lshl_b32 s4, s4, 16
; GFX1250-NEXT: s_and_b32 s7, s7, 0xff
; GFX1250-NEXT: s_lshl_b32 s13, s25, 8
; GFX1250-NEXT: s_or_b32 s2, s2, s3
; GFX1250-NEXT: s_add_co_i32 s3, s12, s12
; GFX1250-NEXT: s_add_co_i32 s24, s24, s24
; GFX1250-NEXT: s_add_co_i32 s1, s1, s1
; GFX1250-NEXT: s_add_co_i32 s0, s0, s0
; GFX1250-NEXT: s_and_b32 s11, s11, 0xffff
; GFX1250-NEXT: s_lshl_b32 s22, s22, 16
; GFX1250-NEXT: s_add_co_i32 s10, s10, s10
; GFX1250-NEXT: s_add_co_i32 s23, s23, s23
; GFX1250-NEXT: s_and_b32 s9, s9, 0xffff
; GFX1250-NEXT: s_lshl_b32 s18, s18, 16
; GFX1250-NEXT: s_add_co_i32 s8, s8, s8
; GFX1250-NEXT: s_add_co_i32 s28, s28, s28
; GFX1250-NEXT: s_or_b32 s4, s5, s4
; GFX1250-NEXT: s_or_b32 s5, s7, s13
; GFX1250-NEXT: s_and_b32 s3, s3, 0xff
; GFX1250-NEXT: s_lshl_b32 s7, s24, 8
; GFX1250-NEXT: s_lshl_b32 s1, s1, 8
; GFX1250-NEXT: s_and_b32 s0, s0, 0xff
; GFX1250-NEXT: s_or_b32 s11, s11, s22
; GFX1250-NEXT: s_and_b32 s10, s10, 0xff
; GFX1250-NEXT: s_lshl_b32 s22, s23, 8
; GFX1250-NEXT: s_or_b32 s9, s9, s18
; GFX1250-NEXT: s_and_b32 s8, s8, 0xff
; GFX1250-NEXT: s_lshl_b32 s18, s28, 8
; GFX1250-NEXT: s_or_b32 s6, s6, s14
; GFX1250-NEXT: s_or_b32 s5, s5, s13
; GFX1250-NEXT: s_or_b32 s4, s4, s11
; GFX1250-NEXT: s_or_b32 s8, s8, s9
; GFX1250-NEXT: s_or_b32 s2, s2, s22
; GFX1250-NEXT: s_or_b32 s0, s0, s18
; GFX1250-NEXT: s_and_b32 s6, s6, 0xffff
; GFX1250-NEXT: s_lshl_b32 s12, s12, 16
; GFX1250-NEXT: s_or_b32 s3, s3, s7
; GFX1250-NEXT: s_or_b32 s0, s0, s1
; GFX1250-NEXT: s_or_b32 s10, s10, s22
; GFX1250-NEXT: s_or_b32 s8, s8, s18
; GFX1250-NEXT: s_and_b32 s5, s5, 0xffff
; GFX1250-NEXT: s_and_b32 s4, s4, 0xffff
; GFX1250-NEXT: s_lshl_b32 s8, s8, 16
; GFX1250-NEXT: s_lshl_b32 s9, s10, 16
; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff
; GFX1250-NEXT: s_and_b32 s1, s3, 0xffff
; GFX1250-NEXT: s_lshl_b32 s0, s0, 16
; GFX1250-NEXT: s_lshl_b32 s2, s2, 16
; GFX1250-NEXT: s_and_b32 s10, s10, 0xffff
; GFX1250-NEXT: s_lshl_b32 s20, s20, 16
; GFX1250-NEXT: s_and_b32 s0, s0, 0xffff
; GFX1250-NEXT: s_and_b32 s8, s8, 0xffff
; GFX1250-NEXT: s_lshl_b32 s16, s16, 16
; GFX1250-NEXT: s_or_b32 s6, s6, s12
; GFX1250-NEXT: s_or_b32 s4, s4, s8
; GFX1250-NEXT: s_or_b32 s5, s5, s9
; GFX1250-NEXT: s_or_b32 s2, s2, s20
; GFX1250-NEXT: s_or_b32 s0, s0, s16
; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX1250-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX1250-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX1250-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
; GFX1250-NEXT: s_or_b32 s0, s1, s0
; GFX1250-NEXT: s_or_b32 s1, s5, s2
; GFX1250-NEXT: s_or_b32 s10, s10, s20
; GFX1250-NEXT: s_or_b32 s8, s8, s16
; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s6
; GFX1250-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
; GFX1250-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_store_b128 v[8:9], v[0:3], off
; GFX1250-NEXT: global_store_b128 v[10:11], v[4:7], off

View File

@ -117,12 +117,12 @@ define amdgpu_kernel void @sadd64rr(ptr addrspace(1) %out, i64 %a, i64 %b) {
;
; GFX1250-LABEL: sadd64rr:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5]
; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[6:7]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
@ -818,17 +818,17 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
;
; GFX1250-LABEL: suaddo64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_add_nc_u64 s[6:7], s[4:5], s[6:7]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_cmp_lt_u64_e64 s4, s[6:7], s[4:5]
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[12:13], s[14:15]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-NEXT: v_cmp_lt_u64_e64 s0, s[0:1], s[12:13]
; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: global_store_b8 v2, v3, s[2:3]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[8:9]
; GFX1250-NEXT: global_store_b8 v2, v3, s[10:11]
; GFX1250-NEXT: s_endpgm
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
%val = extractvalue { i64, i1 } %uadd, 0
@ -1096,12 +1096,12 @@ define amdgpu_kernel void @ssub64rr(ptr addrspace(1) %out, i64 %a, i64 %b) {
;
; GFX1250-LABEL: ssub64rr:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_sub_nc_u64 s[2:3], s[2:3], s[4:5]
; GFX1250-NEXT: s_sub_nc_u64 s[2:3], s[2:3], s[6:7]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
@ -1798,17 +1798,17 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
;
; GFX1250-LABEL: susubo64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_sub_nc_u64 s[6:7], s[4:5], s[6:7]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_cmp_gt_u64_e64 s4, s[6:7], s[4:5]
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
; GFX1250-NEXT: s_sub_nc_u64 s[0:1], s[12:13], s[14:15]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-NEXT: v_cmp_gt_u64_e64 s0, s[0:1], s[12:13]
; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: global_store_b8 v2, v3, s[2:3]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[8:9]
; GFX1250-NEXT: global_store_b8 v2, v3, s[10:11]
; GFX1250-NEXT: s_endpgm
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
%val = extractvalue { i64, i1 } %usub, 0
@ -3099,70 +3099,70 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
;
; GFX1250-LABEL: sudiv64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_or_b64 s[6:7], s[2:3], s[4:5]
; GFX1250-NEXT: s_or_b64 s[4:5], s[2:3], s[6:7]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_and_b64 s[6:7], s[6:7], 0xffffffff00000000
; GFX1250-NEXT: s_cmp_lg_u64 s[6:7], 0
; GFX1250-NEXT: s_and_b64 s[4:5], s[4:5], 0xffffffff00000000
; GFX1250-NEXT: s_cmp_lg_u64 s[4:5], 0
; GFX1250-NEXT: s_cbranch_scc0 .LBB16_4
; GFX1250-NEXT: ; %bb.1:
; GFX1250-NEXT: s_cvt_f32_u32 s6, s4
; GFX1250-NEXT: s_cvt_f32_u32 s7, s5
; GFX1250-NEXT: s_sub_nc_u64 s[10:11], 0, s[4:5]
; GFX1250-NEXT: s_cvt_f32_u32 s4, s6
; GFX1250-NEXT: s_cvt_f32_u32 s5, s7
; GFX1250-NEXT: s_sub_nc_u64 s[10:11], 0, s[6:7]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; GFX1250-NEXT: s_fmac_f32 s6, s7, 0x4f800000
; GFX1250-NEXT: v_s_rcp_f32 s6, s6
; GFX1250-NEXT: s_fmac_f32 s4, s5, 0x4f800000
; GFX1250-NEXT: v_s_rcp_f32 s4, s4
; GFX1250-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; GFX1250-NEXT: s_mul_f32 s6, s6, 0x5f7ffffc
; GFX1250-NEXT: s_mul_f32 s7, s6, 0x2f800000
; GFX1250-NEXT: s_mul_f32 s4, s4, 0x5f7ffffc
; GFX1250-NEXT: s_mul_f32 s5, s4, 0x2f800000
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; GFX1250-NEXT: s_trunc_f32 s7, s7
; GFX1250-NEXT: s_fmac_f32 s6, s7, 0xcf800000
; GFX1250-NEXT: s_cvt_u32_f32 s9, s7
; GFX1250-NEXT: s_mov_b32 s7, 0
; GFX1250-NEXT: s_trunc_f32 s5, s5
; GFX1250-NEXT: s_fmac_f32 s4, s5, 0xcf800000
; GFX1250-NEXT: s_cvt_u32_f32 s9, s5
; GFX1250-NEXT: s_mov_b32 s5, 0
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; GFX1250-NEXT: s_cvt_u32_f32 s8, s6
; GFX1250-NEXT: s_cvt_u32_f32 s8, s4
; GFX1250-NEXT: s_mul_u64 s[12:13], s[10:11], s[8:9]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_mul_hi_u32 s15, s8, s13
; GFX1250-NEXT: s_mul_i32 s14, s8, s13
; GFX1250-NEXT: s_mul_hi_u32 s6, s8, s12
; GFX1250-NEXT: s_mul_hi_u32 s4, s8, s12
; GFX1250-NEXT: s_mul_i32 s17, s9, s12
; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[6:7], s[14:15]
; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[4:5], s[14:15]
; GFX1250-NEXT: s_mul_hi_u32 s16, s9, s12
; GFX1250-NEXT: s_mul_hi_u32 s18, s9, s13
; GFX1250-NEXT: s_add_co_u32 s6, s14, s17
; GFX1250-NEXT: s_add_co_ci_u32 s6, s15, s16
; GFX1250-NEXT: s_add_co_u32 s4, s14, s17
; GFX1250-NEXT: s_add_co_ci_u32 s4, s15, s16
; GFX1250-NEXT: s_mul_i32 s12, s9, s13
; GFX1250-NEXT: s_add_co_ci_u32 s13, s18, 0
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[6:7], s[12:13]
; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[4:5], s[12:13]
; GFX1250-NEXT: s_add_co_u32 s8, s8, s12
; GFX1250-NEXT: s_cselect_b32 s6, -1, 0
; GFX1250-NEXT: s_cselect_b32 s4, -1, 0
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_cmp_lg_u32 s6, 0
; GFX1250-NEXT: s_cmp_lg_u32 s4, 0
; GFX1250-NEXT: s_add_co_ci_u32 s9, s9, s13
; GFX1250-NEXT: s_mul_u64 s[10:11], s[10:11], s[8:9]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_mul_hi_u32 s13, s8, s11
; GFX1250-NEXT: s_mul_i32 s12, s8, s11
; GFX1250-NEXT: s_mul_hi_u32 s6, s8, s10
; GFX1250-NEXT: s_mul_hi_u32 s4, s8, s10
; GFX1250-NEXT: s_mul_i32 s15, s9, s10
; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[6:7], s[12:13]
; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[4:5], s[12:13]
; GFX1250-NEXT: s_mul_hi_u32 s14, s9, s10
; GFX1250-NEXT: s_mul_hi_u32 s16, s9, s11
; GFX1250-NEXT: s_add_co_u32 s6, s12, s15
; GFX1250-NEXT: s_add_co_ci_u32 s6, s13, s14
; GFX1250-NEXT: s_add_co_u32 s4, s12, s15
; GFX1250-NEXT: s_add_co_ci_u32 s4, s13, s14
; GFX1250-NEXT: s_mul_i32 s10, s9, s11
; GFX1250-NEXT: s_add_co_ci_u32 s11, s16, 0
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_add_nc_u64 s[10:11], s[6:7], s[10:11]
; GFX1250-NEXT: s_add_nc_u64 s[10:11], s[4:5], s[10:11]
; GFX1250-NEXT: s_add_co_u32 s8, s8, s10
; GFX1250-NEXT: s_cselect_b32 s10, -1, 0
; GFX1250-NEXT: s_mul_hi_u32 s6, s2, s8
; GFX1250-NEXT: s_mul_hi_u32 s4, s2, s8
; GFX1250-NEXT: s_cmp_lg_u32 s10, 0
; GFX1250-NEXT: s_mul_hi_u32 s12, s3, s8
; GFX1250-NEXT: s_add_co_ci_u32 s10, s9, s11
@ -3170,33 +3170,33 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX1250-NEXT: s_mul_hi_u32 s9, s2, s10
; GFX1250-NEXT: s_mul_i32 s8, s2, s10
; GFX1250-NEXT: s_mul_hi_u32 s13, s3, s10
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[6:7], s[8:9]
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[4:5], s[8:9]
; GFX1250-NEXT: s_mul_i32 s10, s3, s10
; GFX1250-NEXT: s_add_co_u32 s6, s8, s11
; GFX1250-NEXT: s_add_co_ci_u32 s6, s9, s12
; GFX1250-NEXT: s_add_co_u32 s4, s8, s11
; GFX1250-NEXT: s_add_co_ci_u32 s4, s9, s12
; GFX1250-NEXT: s_add_co_ci_u32 s11, s13, 0
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[6:7], s[10:11]
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[4:5], s[10:11]
; GFX1250-NEXT: s_and_b64 s[10:11], s[8:9], 0xffffffff00000000
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_or_b32 s10, s10, s8
; GFX1250-NEXT: s_mul_u64 s[8:9], s[4:5], s[10:11]
; GFX1250-NEXT: s_mul_u64 s[8:9], s[6:7], s[10:11]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_sub_co_u32 s6, s2, s8
; GFX1250-NEXT: s_sub_co_u32 s4, s2, s8
; GFX1250-NEXT: s_cselect_b32 s8, -1, 0
; GFX1250-NEXT: s_sub_co_i32 s12, s3, s9
; GFX1250-NEXT: s_cmp_lg_u32 s8, 0
; GFX1250-NEXT: s_sub_co_ci_u32 s12, s12, s5
; GFX1250-NEXT: s_sub_co_u32 s13, s6, s4
; GFX1250-NEXT: s_sub_co_ci_u32 s12, s12, s7
; GFX1250-NEXT: s_sub_co_u32 s13, s4, s6
; GFX1250-NEXT: s_cselect_b32 s14, -1, 0
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_cmp_lg_u32 s14, 0
; GFX1250-NEXT: s_sub_co_ci_u32 s12, s12, 0
; GFX1250-NEXT: s_cmp_ge_u32 s12, s5
; GFX1250-NEXT: s_cmp_ge_u32 s12, s7
; GFX1250-NEXT: s_cselect_b32 s14, -1, 0
; GFX1250-NEXT: s_cmp_ge_u32 s13, s4
; GFX1250-NEXT: s_cmp_ge_u32 s13, s6
; GFX1250-NEXT: s_cselect_b32 s15, -1, 0
; GFX1250-NEXT: s_cmp_eq_u32 s12, s5
; GFX1250-NEXT: s_cmp_eq_u32 s12, s7
; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[10:11], 1
; GFX1250-NEXT: s_cselect_b32 s16, s15, s14
; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[10:11], 2
@ -3206,20 +3206,20 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX1250-NEXT: s_cmp_lg_u32 s8, 0
; GFX1250-NEXT: s_sub_co_ci_u32 s3, s3, s9
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_cmp_ge_u32 s3, s5
; GFX1250-NEXT: s_cmp_ge_u32 s3, s7
; GFX1250-NEXT: s_cselect_b32 s8, -1, 0
; GFX1250-NEXT: s_cmp_ge_u32 s6, s4
; GFX1250-NEXT: s_cselect_b32 s6, -1, 0
; GFX1250-NEXT: s_cmp_eq_u32 s3, s5
; GFX1250-NEXT: s_cselect_b32 s3, s6, s8
; GFX1250-NEXT: s_cmp_ge_u32 s4, s6
; GFX1250-NEXT: s_cselect_b32 s4, -1, 0
; GFX1250-NEXT: s_cmp_eq_u32 s3, s7
; GFX1250-NEXT: s_cselect_b32 s3, s4, s8
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_cmp_lg_u32 s3, 0
; GFX1250-NEXT: s_cselect_b32 s9, s13, s11
; GFX1250-NEXT: s_cselect_b32 s8, s12, s10
; GFX1250-NEXT: s_cbranch_execnz .LBB16_3
; GFX1250-NEXT: .LBB16_2:
; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, s4
; GFX1250-NEXT: s_sub_co_i32 s5, 0, s4
; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, s6
; GFX1250-NEXT: s_sub_co_i32 s4, 0, s6
; GFX1250-NEXT: s_mov_b32 s9, 0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
; GFX1250-NEXT: v_rcp_iflag_f32_e32 v0, v0
@ -3228,23 +3228,23 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1250-NEXT: v_readfirstlane_b32 s3, v0
; GFX1250-NEXT: s_mul_i32 s5, s5, s3
; GFX1250-NEXT: s_mul_i32 s4, s4, s3
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_mul_hi_u32 s5, s3, s5
; GFX1250-NEXT: s_add_co_i32 s3, s3, s5
; GFX1250-NEXT: s_mul_hi_u32 s4, s3, s4
; GFX1250-NEXT: s_add_co_i32 s3, s3, s4
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_mul_hi_u32 s3, s2, s3
; GFX1250-NEXT: s_mul_i32 s5, s3, s4
; GFX1250-NEXT: s_mul_i32 s4, s3, s6
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_sub_co_i32 s2, s2, s5
; GFX1250-NEXT: s_add_co_i32 s5, s3, 1
; GFX1250-NEXT: s_sub_co_i32 s6, s2, s4
; GFX1250-NEXT: s_cmp_ge_u32 s2, s4
; GFX1250-NEXT: s_cselect_b32 s3, s5, s3
; GFX1250-NEXT: s_cselect_b32 s2, s6, s2
; GFX1250-NEXT: s_add_co_i32 s5, s3, 1
; GFX1250-NEXT: s_cmp_ge_u32 s2, s4
; GFX1250-NEXT: s_cselect_b32 s8, s5, s3
; GFX1250-NEXT: s_sub_co_i32 s2, s2, s4
; GFX1250-NEXT: s_add_co_i32 s4, s3, 1
; GFX1250-NEXT: s_sub_co_i32 s5, s2, s6
; GFX1250-NEXT: s_cmp_ge_u32 s2, s6
; GFX1250-NEXT: s_cselect_b32 s3, s4, s3
; GFX1250-NEXT: s_cselect_b32 s2, s5, s2
; GFX1250-NEXT: s_add_co_i32 s4, s3, 1
; GFX1250-NEXT: s_cmp_ge_u32 s2, s6
; GFX1250-NEXT: s_cselect_b32 s8, s4, s3
; GFX1250-NEXT: .LBB16_3:
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]

View File

@ -705,12 +705,13 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(ptr addrspace(1) %C
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b32 v1, v0, s[0:1] scale_offset
; GFX1250-NEXT: global_load_b32 v0, v0, s[2:3] scale_offset
; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1250-NEXT: global_load_b32 v2, v0, s[2:3] scale_offset
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s5
; GFX1250-NEXT: s_wait_loadcnt 0x1
; GFX1250-NEXT: ds_store_b32 v2, v1 offset:32
; GFX1250-NEXT: ds_store_b32 v0, v1 offset:32
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ds_store_b32 v3, v0 offset:32
; GFX1250-NEXT: ds_store_b32 v3, v2 offset:32
; GFX1250-NEXT: s_endpgm
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%in0.gep = getelementptr float, ptr addrspace(1) %in0, i32 %x.i
@ -1282,14 +1283,14 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(ptr addrspace(3)
;
; GFX1250-LABEL: simple_write2_v4f32_superreg_align4:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x8
; GFX1250-NEXT: s_load_b32 s8, s[4:5], 0x0
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
; GFX1250-NEXT: s_load_b128 s[0:3], s[6:7], 0x0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_lshl_add_u32 v0, v0, 4, s4
; GFX1250-NEXT: v_lshl_add_u32 v0, v0, 4, s8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
; GFX1250-NEXT: v_dual_mov_b32 v3, s0 :: v_dual_mov_b32 v4, s1

View File

@ -2179,6 +2179,7 @@ define amdgpu_ps void @flat_addr_64bit_lsr_iv_multiload(ptr inreg %arg, ptr inre
; GFX1250-SDAG-NEXT: flat_load_b32 v1, v0, s[4:5] scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, 0x400
; GFX1250-SDAG-NEXT: ; kill: killed $sgpr4_sgpr5
; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB117_1
; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1250-SDAG-NEXT: s_endpgm
@ -2190,15 +2191,16 @@ define amdgpu_ps void @flat_addr_64bit_lsr_iv_multiload(ptr inreg %arg, ptr inre
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX1250-GISEL-NEXT: .LBB117_1: ; %bb3
; GFX1250-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[2:3], 4, v[2:3]
; GFX1250-GISEL-NEXT: ; kill: killed $vgpr4 killed $vgpr5
; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: flat_load_b32 v6, v[4:5] scope:SCOPE_SYS
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: flat_load_b32 v6, v[4:5] scope:SCOPE_SYS
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: flat_load_b32 v4, v[4:5] scope:SCOPE_SYS
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x400, v2
; GFX1250-GISEL-NEXT: s_cbranch_vccz .LBB117_1

View File

@ -162,32 +162,32 @@ define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-LABEL: test_fmax3_olt_0_f32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s10, -1
; GFX1250-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s10
; GFX1250-NEXT: s_mov_b32 s15, s11
; GFX1250-NEXT: s_mov_b32 s18, s10
; GFX1250-NEXT: s_mov_b32 s19, s11
; GFX1250-NEXT: s_mov_b32 s22, s10
; GFX1250-NEXT: s_mov_b32 s23, s11
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s2, -1
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-NEXT: s_mov_b32 s6, s2
; GFX1250-NEXT: s_mov_b32 s7, s3
; GFX1250-NEXT: s_mov_b32 s18, s2
; GFX1250-NEXT: s_mov_b32 s19, s3
; GFX1250-NEXT: s_mov_b32 s22, s2
; GFX1250-NEXT: s_mov_b32 s23, s3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s12, s2
; GFX1250-NEXT: s_mov_b32 s13, s3
; GFX1250-NEXT: s_mov_b32 s16, s4
; GFX1250-NEXT: s_mov_b32 s17, s5
; GFX1250-NEXT: s_mov_b32 s20, s6
; GFX1250-NEXT: s_mov_b32 s21, s7
; GFX1250-NEXT: buffer_load_b32 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-NEXT: s_mov_b32 s4, s10
; GFX1250-NEXT: s_mov_b32 s5, s11
; GFX1250-NEXT: s_mov_b32 s16, s12
; GFX1250-NEXT: s_mov_b32 s17, s13
; GFX1250-NEXT: s_mov_b32 s20, s14
; GFX1250-NEXT: s_mov_b32 s21, s15
; GFX1250-NEXT: buffer_load_b32 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b32 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b32 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_mov_b32 s8, s0
; GFX1250-NEXT: s_mov_b32 s9, s1
; GFX1250-NEXT: s_mov_b32 s0, s8
; GFX1250-NEXT: s_mov_b32 s1, s9
; GFX1250-NEXT: v_max3_num_f32 v0, v0, v1, v2
; GFX1250-NEXT: buffer_store_b32 v0, off, s[8:11], null
; GFX1250-NEXT: buffer_store_b32 v0, off, s[0:3], null
; GFX1250-NEXT: s_endpgm
%a = load volatile float, ptr addrspace(1) %aptr, align 4
%b = load volatile float, ptr addrspace(1) %bptr, align 4
@ -352,32 +352,32 @@ define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-LABEL: test_fmax3_olt_1_f32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s10, -1
; GFX1250-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s10
; GFX1250-NEXT: s_mov_b32 s15, s11
; GFX1250-NEXT: s_mov_b32 s18, s10
; GFX1250-NEXT: s_mov_b32 s19, s11
; GFX1250-NEXT: s_mov_b32 s22, s10
; GFX1250-NEXT: s_mov_b32 s23, s11
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s2, -1
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-NEXT: s_mov_b32 s6, s2
; GFX1250-NEXT: s_mov_b32 s7, s3
; GFX1250-NEXT: s_mov_b32 s18, s2
; GFX1250-NEXT: s_mov_b32 s19, s3
; GFX1250-NEXT: s_mov_b32 s22, s2
; GFX1250-NEXT: s_mov_b32 s23, s3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s12, s2
; GFX1250-NEXT: s_mov_b32 s13, s3
; GFX1250-NEXT: s_mov_b32 s16, s4
; GFX1250-NEXT: s_mov_b32 s17, s5
; GFX1250-NEXT: s_mov_b32 s20, s6
; GFX1250-NEXT: s_mov_b32 s21, s7
; GFX1250-NEXT: buffer_load_b32 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-NEXT: s_mov_b32 s4, s10
; GFX1250-NEXT: s_mov_b32 s5, s11
; GFX1250-NEXT: s_mov_b32 s16, s12
; GFX1250-NEXT: s_mov_b32 s17, s13
; GFX1250-NEXT: s_mov_b32 s20, s14
; GFX1250-NEXT: s_mov_b32 s21, s15
; GFX1250-NEXT: buffer_load_b32 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b32 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b32 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_mov_b32 s8, s0
; GFX1250-NEXT: s_mov_b32 s9, s1
; GFX1250-NEXT: s_mov_b32 s0, s8
; GFX1250-NEXT: s_mov_b32 s1, s9
; GFX1250-NEXT: v_max3_num_f32 v0, v2, v0, v1
; GFX1250-NEXT: buffer_store_b32 v0, off, s[8:11], null
; GFX1250-NEXT: buffer_store_b32 v0, off, s[0:3], null
; GFX1250-NEXT: s_endpgm
%a = load volatile float, ptr addrspace(1) %aptr, align 4
%b = load volatile float, ptr addrspace(1) %bptr, align 4
@ -609,62 +609,62 @@ define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-TRUE16-LABEL: test_fmax3_olt_0_f16:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-TRUE16-NEXT: s_mov_b32 s10, -1
; GFX1250-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-TRUE16-NEXT: s_mov_b32 s14, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s15, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s18, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s19, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s22, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s23, s11
; GFX1250-TRUE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-TRUE16-NEXT: s_mov_b32 s2, -1
; GFX1250-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-TRUE16-NEXT: s_mov_b32 s6, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s7, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s18, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s19, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s22, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s23, s3
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: s_mov_b32 s12, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s13, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s16, s4
; GFX1250-TRUE16-NEXT: s_mov_b32 s17, s5
; GFX1250-TRUE16-NEXT: s_mov_b32 s20, s6
; GFX1250-TRUE16-NEXT: s_mov_b32 s21, s7
; GFX1250-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_mov_b32 s4, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s5, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s16, s12
; GFX1250-TRUE16-NEXT: s_mov_b32 s17, s13
; GFX1250-TRUE16-NEXT: s_mov_b32 s20, s14
; GFX1250-TRUE16-NEXT: s_mov_b32 s21, s15
; GFX1250-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: buffer_load_u16 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: buffer_load_u16 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: s_mov_b32 s8, s0
; GFX1250-TRUE16-NEXT: s_mov_b32 s9, s1
; GFX1250-TRUE16-NEXT: s_mov_b32 s0, s8
; GFX1250-TRUE16-NEXT: s_mov_b32 s1, s9
; GFX1250-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v1.l, v2.l
; GFX1250-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], null
; GFX1250-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: test_fmax3_olt_0_f16:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-FAKE16-NEXT: s_mov_b32 s10, -1
; GFX1250-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-FAKE16-NEXT: s_mov_b32 s14, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s15, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s18, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s19, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s22, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s23, s11
; GFX1250-FAKE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-FAKE16-NEXT: s_mov_b32 s2, -1
; GFX1250-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-FAKE16-NEXT: s_mov_b32 s6, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s7, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s18, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s19, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s22, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s23, s3
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: s_mov_b32 s12, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s13, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s16, s4
; GFX1250-FAKE16-NEXT: s_mov_b32 s17, s5
; GFX1250-FAKE16-NEXT: s_mov_b32 s20, s6
; GFX1250-FAKE16-NEXT: s_mov_b32 s21, s7
; GFX1250-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_mov_b32 s4, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s5, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s16, s12
; GFX1250-FAKE16-NEXT: s_mov_b32 s17, s13
; GFX1250-FAKE16-NEXT: s_mov_b32 s20, s14
; GFX1250-FAKE16-NEXT: s_mov_b32 s21, s15
; GFX1250-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: s_mov_b32 s8, s0
; GFX1250-FAKE16-NEXT: s_mov_b32 s9, s1
; GFX1250-FAKE16-NEXT: s_mov_b32 s0, s8
; GFX1250-FAKE16-NEXT: s_mov_b32 s1, s9
; GFX1250-FAKE16-NEXT: v_max3_num_f16 v0, v0, v1, v2
; GFX1250-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], null
; GFX1250-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
; GFX1250-FAKE16-NEXT: s_endpgm
%a = load volatile half, ptr addrspace(1) %aptr, align 2
%b = load volatile half, ptr addrspace(1) %bptr, align 2
@ -897,62 +897,62 @@ define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-TRUE16-LABEL: test_fmax3_olt_1_f16:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-TRUE16-NEXT: s_mov_b32 s10, -1
; GFX1250-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-TRUE16-NEXT: s_mov_b32 s14, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s15, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s18, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s19, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s22, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s23, s11
; GFX1250-TRUE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-TRUE16-NEXT: s_mov_b32 s2, -1
; GFX1250-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-TRUE16-NEXT: s_mov_b32 s6, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s7, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s18, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s19, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s22, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s23, s3
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: s_mov_b32 s12, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s13, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s16, s4
; GFX1250-TRUE16-NEXT: s_mov_b32 s17, s5
; GFX1250-TRUE16-NEXT: s_mov_b32 s20, s6
; GFX1250-TRUE16-NEXT: s_mov_b32 s21, s7
; GFX1250-TRUE16-NEXT: buffer_load_u16 v1, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_mov_b32 s4, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s5, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s16, s12
; GFX1250-TRUE16-NEXT: s_mov_b32 s17, s13
; GFX1250-TRUE16-NEXT: s_mov_b32 s20, s14
; GFX1250-TRUE16-NEXT: s_mov_b32 s21, s15
; GFX1250-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: buffer_load_u16 v2, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: buffer_load_u16 v0, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: s_mov_b32 s8, s0
; GFX1250-TRUE16-NEXT: s_mov_b32 s9, s1
; GFX1250-TRUE16-NEXT: s_mov_b32 s0, s8
; GFX1250-TRUE16-NEXT: s_mov_b32 s1, s9
; GFX1250-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v1.l, v2.l
; GFX1250-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], null
; GFX1250-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: test_fmax3_olt_1_f16:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-FAKE16-NEXT: s_mov_b32 s10, -1
; GFX1250-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-FAKE16-NEXT: s_mov_b32 s14, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s15, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s18, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s19, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s22, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s23, s11
; GFX1250-FAKE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-FAKE16-NEXT: s_mov_b32 s2, -1
; GFX1250-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-FAKE16-NEXT: s_mov_b32 s6, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s7, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s18, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s19, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s22, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s23, s3
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: s_mov_b32 s12, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s13, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s16, s4
; GFX1250-FAKE16-NEXT: s_mov_b32 s17, s5
; GFX1250-FAKE16-NEXT: s_mov_b32 s20, s6
; GFX1250-FAKE16-NEXT: s_mov_b32 s21, s7
; GFX1250-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_mov_b32 s4, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s5, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s16, s12
; GFX1250-FAKE16-NEXT: s_mov_b32 s17, s13
; GFX1250-FAKE16-NEXT: s_mov_b32 s20, s14
; GFX1250-FAKE16-NEXT: s_mov_b32 s21, s15
; GFX1250-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: s_mov_b32 s8, s0
; GFX1250-FAKE16-NEXT: s_mov_b32 s9, s1
; GFX1250-FAKE16-NEXT: s_mov_b32 s0, s8
; GFX1250-FAKE16-NEXT: s_mov_b32 s1, s9
; GFX1250-FAKE16-NEXT: v_max3_num_f16 v0, v2, v0, v1
; GFX1250-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], null
; GFX1250-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
; GFX1250-FAKE16-NEXT: s_endpgm
%a = load volatile half, ptr addrspace(1) %aptr, align 2
%b = load volatile half, ptr addrspace(1) %bptr, align 2

View File

@ -162,32 +162,32 @@ define amdgpu_kernel void @test_fmin3_olt_0_f32(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-LABEL: test_fmin3_olt_0_f32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s10, -1
; GFX1250-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s10
; GFX1250-NEXT: s_mov_b32 s15, s11
; GFX1250-NEXT: s_mov_b32 s18, s10
; GFX1250-NEXT: s_mov_b32 s19, s11
; GFX1250-NEXT: s_mov_b32 s22, s10
; GFX1250-NEXT: s_mov_b32 s23, s11
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s2, -1
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-NEXT: s_mov_b32 s6, s2
; GFX1250-NEXT: s_mov_b32 s7, s3
; GFX1250-NEXT: s_mov_b32 s18, s2
; GFX1250-NEXT: s_mov_b32 s19, s3
; GFX1250-NEXT: s_mov_b32 s22, s2
; GFX1250-NEXT: s_mov_b32 s23, s3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s12, s2
; GFX1250-NEXT: s_mov_b32 s13, s3
; GFX1250-NEXT: s_mov_b32 s16, s4
; GFX1250-NEXT: s_mov_b32 s17, s5
; GFX1250-NEXT: s_mov_b32 s20, s6
; GFX1250-NEXT: s_mov_b32 s21, s7
; GFX1250-NEXT: buffer_load_b32 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-NEXT: s_mov_b32 s4, s10
; GFX1250-NEXT: s_mov_b32 s5, s11
; GFX1250-NEXT: s_mov_b32 s16, s12
; GFX1250-NEXT: s_mov_b32 s17, s13
; GFX1250-NEXT: s_mov_b32 s20, s14
; GFX1250-NEXT: s_mov_b32 s21, s15
; GFX1250-NEXT: buffer_load_b32 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b32 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b32 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_mov_b32 s8, s0
; GFX1250-NEXT: s_mov_b32 s9, s1
; GFX1250-NEXT: s_mov_b32 s0, s8
; GFX1250-NEXT: s_mov_b32 s1, s9
; GFX1250-NEXT: v_min3_num_f32 v0, v0, v1, v2
; GFX1250-NEXT: buffer_store_b32 v0, off, s[8:11], null
; GFX1250-NEXT: buffer_store_b32 v0, off, s[0:3], null
; GFX1250-NEXT: s_endpgm
%a = load volatile float, ptr addrspace(1) %aptr, align 4
%b = load volatile float, ptr addrspace(1) %bptr, align 4
@ -352,32 +352,32 @@ define amdgpu_kernel void @test_fmin3_olt_1_f32(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-LABEL: test_fmin3_olt_1_f32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s10, -1
; GFX1250-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s10
; GFX1250-NEXT: s_mov_b32 s15, s11
; GFX1250-NEXT: s_mov_b32 s18, s10
; GFX1250-NEXT: s_mov_b32 s19, s11
; GFX1250-NEXT: s_mov_b32 s22, s10
; GFX1250-NEXT: s_mov_b32 s23, s11
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s2, -1
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-NEXT: s_mov_b32 s6, s2
; GFX1250-NEXT: s_mov_b32 s7, s3
; GFX1250-NEXT: s_mov_b32 s18, s2
; GFX1250-NEXT: s_mov_b32 s19, s3
; GFX1250-NEXT: s_mov_b32 s22, s2
; GFX1250-NEXT: s_mov_b32 s23, s3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s12, s2
; GFX1250-NEXT: s_mov_b32 s13, s3
; GFX1250-NEXT: s_mov_b32 s16, s4
; GFX1250-NEXT: s_mov_b32 s17, s5
; GFX1250-NEXT: s_mov_b32 s20, s6
; GFX1250-NEXT: s_mov_b32 s21, s7
; GFX1250-NEXT: buffer_load_b32 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-NEXT: s_mov_b32 s4, s10
; GFX1250-NEXT: s_mov_b32 s5, s11
; GFX1250-NEXT: s_mov_b32 s16, s12
; GFX1250-NEXT: s_mov_b32 s17, s13
; GFX1250-NEXT: s_mov_b32 s20, s14
; GFX1250-NEXT: s_mov_b32 s21, s15
; GFX1250-NEXT: buffer_load_b32 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b32 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b32 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_mov_b32 s8, s0
; GFX1250-NEXT: s_mov_b32 s9, s1
; GFX1250-NEXT: s_mov_b32 s0, s8
; GFX1250-NEXT: s_mov_b32 s1, s9
; GFX1250-NEXT: v_min3_num_f32 v0, v2, v0, v1
; GFX1250-NEXT: buffer_store_b32 v0, off, s[8:11], null
; GFX1250-NEXT: buffer_store_b32 v0, off, s[0:3], null
; GFX1250-NEXT: s_endpgm
%a = load volatile float, ptr addrspace(1) %aptr, align 4
%b = load volatile float, ptr addrspace(1) %bptr, align 4
@ -609,62 +609,62 @@ define amdgpu_kernel void @test_fmin3_olt_0_f16(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-TRUE16-LABEL: test_fmin3_olt_0_f16:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-TRUE16-NEXT: s_mov_b32 s10, -1
; GFX1250-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-TRUE16-NEXT: s_mov_b32 s14, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s15, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s18, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s19, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s22, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s23, s11
; GFX1250-TRUE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-TRUE16-NEXT: s_mov_b32 s2, -1
; GFX1250-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-TRUE16-NEXT: s_mov_b32 s6, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s7, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s18, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s19, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s22, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s23, s3
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: s_mov_b32 s12, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s13, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s16, s4
; GFX1250-TRUE16-NEXT: s_mov_b32 s17, s5
; GFX1250-TRUE16-NEXT: s_mov_b32 s20, s6
; GFX1250-TRUE16-NEXT: s_mov_b32 s21, s7
; GFX1250-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_mov_b32 s4, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s5, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s16, s12
; GFX1250-TRUE16-NEXT: s_mov_b32 s17, s13
; GFX1250-TRUE16-NEXT: s_mov_b32 s20, s14
; GFX1250-TRUE16-NEXT: s_mov_b32 s21, s15
; GFX1250-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: buffer_load_u16 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: buffer_load_u16 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: s_mov_b32 s8, s0
; GFX1250-TRUE16-NEXT: s_mov_b32 s9, s1
; GFX1250-TRUE16-NEXT: s_mov_b32 s0, s8
; GFX1250-TRUE16-NEXT: s_mov_b32 s1, s9
; GFX1250-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v1.l, v2.l
; GFX1250-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], null
; GFX1250-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: test_fmin3_olt_0_f16:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-FAKE16-NEXT: s_mov_b32 s10, -1
; GFX1250-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-FAKE16-NEXT: s_mov_b32 s14, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s15, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s18, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s19, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s22, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s23, s11
; GFX1250-FAKE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-FAKE16-NEXT: s_mov_b32 s2, -1
; GFX1250-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-FAKE16-NEXT: s_mov_b32 s6, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s7, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s18, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s19, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s22, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s23, s3
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: s_mov_b32 s12, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s13, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s16, s4
; GFX1250-FAKE16-NEXT: s_mov_b32 s17, s5
; GFX1250-FAKE16-NEXT: s_mov_b32 s20, s6
; GFX1250-FAKE16-NEXT: s_mov_b32 s21, s7
; GFX1250-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_mov_b32 s4, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s5, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s16, s12
; GFX1250-FAKE16-NEXT: s_mov_b32 s17, s13
; GFX1250-FAKE16-NEXT: s_mov_b32 s20, s14
; GFX1250-FAKE16-NEXT: s_mov_b32 s21, s15
; GFX1250-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: s_mov_b32 s8, s0
; GFX1250-FAKE16-NEXT: s_mov_b32 s9, s1
; GFX1250-FAKE16-NEXT: s_mov_b32 s0, s8
; GFX1250-FAKE16-NEXT: s_mov_b32 s1, s9
; GFX1250-FAKE16-NEXT: v_min3_num_f16 v0, v0, v1, v2
; GFX1250-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], null
; GFX1250-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
; GFX1250-FAKE16-NEXT: s_endpgm
%a = load volatile half, ptr addrspace(1) %aptr, align 2
%b = load volatile half, ptr addrspace(1) %bptr, align 2
@ -897,62 +897,62 @@ define amdgpu_kernel void @test_fmin3_olt_1_f16(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-TRUE16-LABEL: test_fmin3_olt_1_f16:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-TRUE16-NEXT: s_mov_b32 s10, -1
; GFX1250-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-TRUE16-NEXT: s_mov_b32 s14, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s15, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s18, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s19, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s22, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s23, s11
; GFX1250-TRUE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-TRUE16-NEXT: s_mov_b32 s2, -1
; GFX1250-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-TRUE16-NEXT: s_mov_b32 s6, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s7, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s18, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s19, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s22, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s23, s3
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: s_mov_b32 s12, s2
; GFX1250-TRUE16-NEXT: s_mov_b32 s13, s3
; GFX1250-TRUE16-NEXT: s_mov_b32 s16, s4
; GFX1250-TRUE16-NEXT: s_mov_b32 s17, s5
; GFX1250-TRUE16-NEXT: s_mov_b32 s20, s6
; GFX1250-TRUE16-NEXT: s_mov_b32 s21, s7
; GFX1250-TRUE16-NEXT: buffer_load_u16 v1, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_mov_b32 s4, s10
; GFX1250-TRUE16-NEXT: s_mov_b32 s5, s11
; GFX1250-TRUE16-NEXT: s_mov_b32 s16, s12
; GFX1250-TRUE16-NEXT: s_mov_b32 s17, s13
; GFX1250-TRUE16-NEXT: s_mov_b32 s20, s14
; GFX1250-TRUE16-NEXT: s_mov_b32 s21, s15
; GFX1250-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: buffer_load_u16 v2, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: buffer_load_u16 v0, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-TRUE16-NEXT: s_mov_b32 s8, s0
; GFX1250-TRUE16-NEXT: s_mov_b32 s9, s1
; GFX1250-TRUE16-NEXT: s_mov_b32 s0, s8
; GFX1250-TRUE16-NEXT: s_mov_b32 s1, s9
; GFX1250-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v1.l, v2.l
; GFX1250-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], null
; GFX1250-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: test_fmin3_olt_1_f16:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-FAKE16-NEXT: s_mov_b32 s10, -1
; GFX1250-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-FAKE16-NEXT: s_mov_b32 s14, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s15, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s18, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s19, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s22, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s23, s11
; GFX1250-FAKE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-FAKE16-NEXT: s_mov_b32 s2, -1
; GFX1250-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-FAKE16-NEXT: s_mov_b32 s6, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s7, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s18, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s19, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s22, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s23, s3
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: s_mov_b32 s12, s2
; GFX1250-FAKE16-NEXT: s_mov_b32 s13, s3
; GFX1250-FAKE16-NEXT: s_mov_b32 s16, s4
; GFX1250-FAKE16-NEXT: s_mov_b32 s17, s5
; GFX1250-FAKE16-NEXT: s_mov_b32 s20, s6
; GFX1250-FAKE16-NEXT: s_mov_b32 s21, s7
; GFX1250-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_mov_b32 s4, s10
; GFX1250-FAKE16-NEXT: s_mov_b32 s5, s11
; GFX1250-FAKE16-NEXT: s_mov_b32 s16, s12
; GFX1250-FAKE16-NEXT: s_mov_b32 s17, s13
; GFX1250-FAKE16-NEXT: s_mov_b32 s20, s14
; GFX1250-FAKE16-NEXT: s_mov_b32 s21, s15
; GFX1250-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], null scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX1250-FAKE16-NEXT: s_mov_b32 s8, s0
; GFX1250-FAKE16-NEXT: s_mov_b32 s9, s1
; GFX1250-FAKE16-NEXT: s_mov_b32 s0, s8
; GFX1250-FAKE16-NEXT: s_mov_b32 s1, s9
; GFX1250-FAKE16-NEXT: v_min3_num_f16 v0, v2, v0, v1
; GFX1250-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], null
; GFX1250-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
; GFX1250-FAKE16-NEXT: s_endpgm
%a = load volatile half, ptr addrspace(1) %aptr, align 2
%b = load volatile half, ptr addrspace(1) %bptr, align 2
@ -1217,36 +1217,36 @@ define amdgpu_kernel void @test_fmin3_olt_0_f64(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-LABEL: test_fmin3_olt_0_f64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s10, -1
; GFX1250-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s10
; GFX1250-NEXT: s_mov_b32 s15, s11
; GFX1250-NEXT: s_mov_b32 s18, s10
; GFX1250-NEXT: s_mov_b32 s19, s11
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s2, -1
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-NEXT: s_mov_b32 s6, s2
; GFX1250-NEXT: s_mov_b32 s7, s3
; GFX1250-NEXT: s_mov_b32 s18, s2
; GFX1250-NEXT: s_mov_b32 s19, s3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s12, s2
; GFX1250-NEXT: s_mov_b32 s13, s3
; GFX1250-NEXT: s_mov_b32 s16, s4
; GFX1250-NEXT: s_mov_b32 s17, s5
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[12:15], null scope:SCOPE_SYS
; GFX1250-NEXT: s_mov_b32 s4, s10
; GFX1250-NEXT: s_mov_b32 s5, s11
; GFX1250-NEXT: s_mov_b32 s16, s12
; GFX1250-NEXT: s_mov_b32 s17, s13
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[4:7], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b64 v[2:3], off, s[16:19], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x1
; GFX1250-NEXT: s_mov_b32 s12, s6
; GFX1250-NEXT: s_mov_b32 s13, s7
; GFX1250-NEXT: s_mov_b32 s8, s0
; GFX1250-NEXT: buffer_load_b64 v[4:5], off, s[12:15], null scope:SCOPE_SYS
; GFX1250-NEXT: s_mov_b32 s4, s14
; GFX1250-NEXT: s_mov_b32 s5, s15
; GFX1250-NEXT: s_mov_b32 s0, s8
; GFX1250-NEXT: buffer_load_b64 v[4:5], off, s[4:7], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_mov_b32 s9, s1
; GFX1250-NEXT: s_mov_b32 s1, s9
; GFX1250-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
; GFX1250-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
; GFX1250-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
; GFX1250-NEXT: s_endpgm
%a = load volatile double, ptr addrspace(1) %aptr, align 4
%b = load volatile double, ptr addrspace(1) %bptr, align 4
@ -1427,36 +1427,36 @@ define amdgpu_kernel void @test_fmin3_olt_1_f64(ptr addrspace(1) %out, ptr addrs
;
; GFX1250-LABEL: test_fmin3_olt_1_f64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s10, -1
; GFX1250-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s10
; GFX1250-NEXT: s_mov_b32 s15, s11
; GFX1250-NEXT: s_mov_b32 s18, s10
; GFX1250-NEXT: s_mov_b32 s19, s11
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: s_mov_b32 s2, -1
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-NEXT: s_mov_b32 s6, s2
; GFX1250-NEXT: s_mov_b32 s7, s3
; GFX1250-NEXT: s_mov_b32 s18, s2
; GFX1250-NEXT: s_mov_b32 s19, s3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s12, s2
; GFX1250-NEXT: s_mov_b32 s13, s3
; GFX1250-NEXT: s_mov_b32 s16, s4
; GFX1250-NEXT: s_mov_b32 s17, s5
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[12:15], null scope:SCOPE_SYS
; GFX1250-NEXT: s_mov_b32 s4, s10
; GFX1250-NEXT: s_mov_b32 s5, s11
; GFX1250-NEXT: s_mov_b32 s16, s12
; GFX1250-NEXT: s_mov_b32 s17, s13
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[4:7], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_load_b64 v[2:3], off, s[16:19], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x1
; GFX1250-NEXT: s_mov_b32 s12, s6
; GFX1250-NEXT: s_mov_b32 s13, s7
; GFX1250-NEXT: s_mov_b32 s8, s0
; GFX1250-NEXT: buffer_load_b64 v[4:5], off, s[12:15], null scope:SCOPE_SYS
; GFX1250-NEXT: s_mov_b32 s4, s14
; GFX1250-NEXT: s_mov_b32 s5, s15
; GFX1250-NEXT: s_mov_b32 s0, s8
; GFX1250-NEXT: buffer_load_b64 v[4:5], off, s[4:7], null scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_mov_b32 s9, s1
; GFX1250-NEXT: s_mov_b32 s1, s9
; GFX1250-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
; GFX1250-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
; GFX1250-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1]
; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
; GFX1250-NEXT: s_endpgm
%a = load volatile double, ptr addrspace(1) %aptr, align 4
%b = load volatile double, ptr addrspace(1) %bptr, align 4

View File

@ -11,22 +11,20 @@ define void @test_i8load_v4i8store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
; GCN-SDAG-NEXT: global_load_u8 v2, v[2:3], off
; GCN-SDAG-NEXT: global_load_u8 v3, v[4:5], off
; GCN-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GCN-SDAG-NEXT: global_load_u8 v6, v[2:3], off
; GCN-SDAG-NEXT: global_load_u8 v7, v[4:5], off
; GCN-SDAG-NEXT: global_load_u8 v10, v[0:1], off
; GCN-SDAG-NEXT: s_wait_loadcnt 0x2
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: v_lshlrev_b16 v1, 8, v2
; GCN-SDAG-NEXT: v_lshlrev_b16 v0, 8, v6
; GCN-SDAG-NEXT: s_wait_loadcnt 0x1
; GCN-SDAG-NEXT: v_lshlrev_b16 v2, 8, v3
; GCN-SDAG-NEXT: v_lshlrev_b16 v1, 8, v7
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GCN-SDAG-NEXT: v_or_b32_e32 v1, v7, v1
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GCN-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; GCN-SDAG-NEXT: v_or_b32_e32 v1, v3, v2
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GCN-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GCN-SDAG-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_bitop2_b32 v0, v10, v0 bitop3:0x54
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GCN-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; GCN-SDAG-NEXT: global_store_b32 v[8:9], v0, off
; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
@ -35,13 +33,15 @@ define void @test_i8load_v4i8store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
; GCN-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GCN-GISEL-NEXT: global_load_u8 v1, v[2:3], off
; GCN-GISEL-NEXT: global_load_u8 v2, v[4:5], off
; GCN-GISEL-NEXT: global_load_u8 v6, v[0:1], off
; GCN-GISEL-NEXT: global_load_u8 v7, v[2:3], off
; GCN-GISEL-NEXT: global_load_u8 v10, v[4:5], off
; GCN-GISEL-NEXT: s_wait_loadcnt 0x1
; GCN-GISEL-NEXT: v_lshl_or_b32 v0, v1, 8, v0
; GCN-GISEL-NEXT: s_wait_xcnt 0x2
; GCN-GISEL-NEXT: v_lshl_or_b32 v0, v7, 8, v6
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
; GCN-GISEL-NEXT: v_dual_lshlrev_b32 v1, 16, v2 :: v_dual_lshlrev_b32 v2, 24, v2
; GCN-GISEL-NEXT: s_wait_xcnt 0x1
; GCN-GISEL-NEXT: v_dual_lshlrev_b32 v1, 16, v10 :: v_dual_lshlrev_b32 v2, 24, v10
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-GISEL-NEXT: v_or3_b32 v0, v0, v1, v2
; GCN-GISEL-NEXT: global_store_b32 v[8:9], v0, off
@ -64,21 +64,21 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
; GCN-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off
; GCN-SDAG-NEXT: v_mov_b64_e32 v[8:9], 0
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
; GCN-SDAG-NEXT: v_pk_add_u16 v10, v6, v2
; GCN-SDAG-NEXT: v_pk_add_u16 v11, v7, v3
; GCN-SDAG-NEXT: global_load_b128 v[8:11], v[2:3], off
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], 12
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
; GCN-SDAG-NEXT: v_pk_add_u16 v1, v6, v10
; GCN-SDAG-NEXT: v_pk_add_u16 v12, v7, v11
; GCN-SDAG-NEXT: v_mov_b64_e32 v[6:7], 8
; GCN-SDAG-NEXT: v_pk_add_u16 v4, v4, v0
; GCN-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v10
; GCN-SDAG-NEXT: v_pk_add_u16 v5, v5, v1
; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 0
; GCN-SDAG-NEXT: v_pk_add_u16 v5, v5, v9
; GCN-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GCN-SDAG-NEXT: v_pk_add_u16 v4, v4, v8
; GCN-SDAG-NEXT: s_clause 0x2
; GCN-SDAG-NEXT: global_store_b16 v[2:3], v11, off
; GCN-SDAG-NEXT: global_store_b32 v[6:7], v10, off
; GCN-SDAG-NEXT: global_store_b64 v[8:9], v[4:5], off
; GCN-SDAG-NEXT: global_store_b16 v[2:3], v12, off
; GCN-SDAG-NEXT: global_store_b32 v[6:7], v1, off
; GCN-SDAG-NEXT: global_store_b64 v[10:11], v[4:5], off
; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GCN-GISEL-LABEL: test_v7i16_load_store:
@ -86,28 +86,29 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
; GCN-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[2:3], off
; GCN-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0
; GCN-GISEL-NEXT: v_mov_b64_e32 v[10:11], 2
; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 4
; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 6
; GCN-GISEL-NEXT: v_mov_b64_e32 v[16:17], 8
; GCN-GISEL-NEXT: v_mov_b64_e32 v[18:19], 10
; GCN-GISEL-NEXT: v_mov_b64_e32 v[20:21], 12
; GCN-GISEL-NEXT: global_load_b128 v[8:11], v[2:3], off
; GCN-GISEL-NEXT: s_wait_xcnt 0x0
; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0
; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 2
; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 4
; GCN-GISEL-NEXT: v_mov_b64_e32 v[16:17], 6
; GCN-GISEL-NEXT: v_mov_b64_e32 v[18:19], 8
; GCN-GISEL-NEXT: v_mov_b64_e32 v[20:21], 10
; GCN-GISEL-NEXT: v_mov_b64_e32 v[22:23], 12
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
; GCN-GISEL-NEXT: v_pk_add_u16 v2, v6, v2
; GCN-GISEL-NEXT: v_pk_add_u16 v4, v4, v0
; GCN-GISEL-NEXT: v_pk_add_u16 v1, v5, v1
; GCN-GISEL-NEXT: v_pk_add_u16 v3, v7, v3
; GCN-GISEL-NEXT: v_pk_add_u16 v1, v6, v10
; GCN-GISEL-NEXT: v_pk_add_u16 v4, v4, v8
; GCN-GISEL-NEXT: v_pk_add_u16 v5, v5, v9
; GCN-GISEL-NEXT: v_pk_add_u16 v6, v7, v11
; GCN-GISEL-NEXT: s_clause 0x6
; GCN-GISEL-NEXT: global_store_b16 v[8:9], v4, off
; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[10:11], v4, off
; GCN-GISEL-NEXT: global_store_b16 v[12:13], v1, off
; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[14:15], v1, off
; GCN-GISEL-NEXT: global_store_b16 v[16:17], v2, off
; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[18:19], v2, off
; GCN-GISEL-NEXT: global_store_b16 v[20:21], v3, off
; GCN-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; GCN-GISEL-NEXT: global_store_b16 v[2:3], v4, off
; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[12:13], v4, off
; GCN-GISEL-NEXT: global_store_b16 v[14:15], v5, off
; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[16:17], v5, off
; GCN-GISEL-NEXT: global_store_b16 v[18:19], v1, off
; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[20:21], v1, off
; GCN-GISEL-NEXT: global_store_b16 v[22:23], v6, off
; GCN-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
%vec1 = load <7 x i16>, ptr addrspace(1) %ptr1
%insert = insertelement <7 x i16> %vec1, i16 20, i32 4
@ -253,8 +254,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG-NEXT: global_load_b128 v[22:25], v[0:1], off offset:32
; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off offset:16
; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off
; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64
; GCN-SDAG-NEXT: v_mov_b64_e32 v[36:37], 0x70
; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:64
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x70
; GCN-SDAG-NEXT: v_mov_b64_e32 v[48:49], 48
; GCN-SDAG-NEXT: v_mov_b64_e32 v[38:39], 0x60
; GCN-SDAG-NEXT: v_mov_b64_e32 v[50:51], 32
@ -262,14 +263,15 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG-NEXT: v_mov_b64_e32 v[66:67], 0
; GCN-SDAG-NEXT: v_mov_b64_e32 v[52:53], 0x50
; GCN-SDAG-NEXT: v_mov_b64_e32 v[54:55], 64
; GCN-SDAG-NEXT: v_dual_mov_b32 v34, 0xc8 :: v_dual_mov_b32 v35, 0
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: v_dual_mov_b32 v0, 0xc8 :: v_dual_mov_b32 v1, 0
; GCN-SDAG-NEXT: s_wait_loadcnt 0x7
; GCN-SDAG-NEXT: global_store_b128 v[36:37], v[6:9], off
; GCN-SDAG-NEXT: global_store_b128 v[2:3], v[6:9], off
; GCN-SDAG-NEXT: s_wait_loadcnt 0x6
; GCN-SDAG-NEXT: global_store_b128 v[38:39], v[10:13], off
; GCN-SDAG-NEXT: s_wait_loadcnt 0x5
; GCN-SDAG-NEXT: s_wait_xcnt 0x1
; GCN-SDAG-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v37, v17
; GCN-SDAG-NEXT: v_dual_mov_b32 v2, v16 :: v_dual_mov_b32 v3, v17
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[12:13], v[12:13], v[12:13]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[10:11], v[10:11], v[10:11]
@ -286,8 +288,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[8:9], v[8:9], v[8:9]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[6:7], v[6:7], v[6:7]
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[50:51], v[2:3], v[2:3]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[48:49], v[0:1], v[0:1]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[50:51], v[36:37], v[36:37]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[48:49], v[34:35], v[34:35]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[16:17], v[16:17], v[16:17]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[14:15], 0xc8, v[14:15]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[24:25], 0x64, v[24:25]
@ -298,8 +300,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[20:21], v[20:21], v[20:21]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19]
; GCN-SDAG-NEXT: s_clause 0x1
; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[34:37], off
; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[0:3], off
; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[0:3], off
; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[34:37], off
; GCN-SDAG-NEXT: s_clause 0x7
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:96
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:112
@ -309,7 +311,7 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:48
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[30:33], off
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[26:29], off offset:16
; GCN-SDAG-NEXT: s_wait_xcnt 0x8
; GCN-SDAG-NEXT: s_wait_xcnt 0x9
; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v32 :: v_dual_mov_b32 v1, v33
; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
@ -325,7 +327,7 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-GISEL-NEXT: global_load_b128 v[22:25], v[0:1], off offset:48
; GCN-GISEL-NEXT: global_load_b128 v[26:29], v[0:1], off offset:96
; GCN-GISEL-NEXT: global_load_b128 v[30:33], v[0:1], off offset:112
; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64
; GCN-GISEL-NEXT: global_load_b128 v[34:37], v[0:1], off offset:64
; GCN-GISEL-NEXT: v_mov_b64_e32 v[38:39], 0
; GCN-GISEL-NEXT: v_mov_b64_e32 v[48:49], 16
; GCN-GISEL-NEXT: v_mov_b64_e32 v[50:51], 32
@ -333,7 +335,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-GISEL-NEXT: v_mov_b64_e32 v[66:67], 0x60
; GCN-GISEL-NEXT: v_mov_b64_e32 v[68:69], 0x70
; GCN-GISEL-NEXT: v_mov_b64_e32 v[54:55], 64
; GCN-GISEL-NEXT: v_mov_b64_e32 v[34:35], 0xc8
; GCN-GISEL-NEXT: s_wait_xcnt 0x0
; GCN-GISEL-NEXT: v_mov_b64_e32 v[0:1], 0xc8
; GCN-GISEL-NEXT: v_mov_b64_e32 v[64:65], 0x50
; GCN-GISEL-NEXT: s_wait_loadcnt 0x6
; GCN-GISEL-NEXT: global_store_b128 v[38:39], v[10:13], off
@ -349,7 +352,7 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-GISEL-NEXT: global_store_b128 v[68:69], v[30:33], off
; GCN-GISEL-NEXT: s_wait_xcnt 0x5
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[12:13], v[12:13], v[12:13]
; GCN-GISEL-NEXT: v_mov_b64_e32 v[36:37], v[8:9]
; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[10:11], v[10:11], v[10:11]
; GCN-GISEL-NEXT: s_wait_xcnt 0x4
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[14:15], v[14:15], v[14:15]
@ -361,8 +364,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[22:23], v[22:23], v[22:23]
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[24:25], v[24:25], v[24:25]
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[48:49], v[0:1], v[0:1]
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[50:51], v[2:3], v[2:3]
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[48:49], v[34:35], v[34:35]
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[50:51], v[36:37], v[36:37]
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[6:7], 0xc8, v[6:7]
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[8:9], v[8:9], v[8:9]
; GCN-GISEL-NEXT: s_wait_xcnt 0x1
@ -372,8 +375,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[30:31], v[30:31], v[30:31]
; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[32:33], v[32:33], v[32:33]
; GCN-GISEL-NEXT: s_clause 0x1
; GCN-GISEL-NEXT: global_store_b128 v[54:55], v[0:3], off
; GCN-GISEL-NEXT: global_store_b128 v[64:65], v[34:37], off
; GCN-GISEL-NEXT: global_store_b128 v[54:55], v[34:37], off
; GCN-GISEL-NEXT: global_store_b128 v[64:65], v[0:3], off
; GCN-GISEL-NEXT: s_clause 0x7
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[10:13], off
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[14:17], off offset:16
@ -383,7 +386,7 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[6:9], off offset:80
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[26:29], off offset:96
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[30:33], off offset:112
; GCN-GISEL-NEXT: s_wait_xcnt 0x9
; GCN-GISEL-NEXT: s_wait_xcnt 0x8
; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v12 :: v_dual_mov_b32 v1, v13
; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
%a = load <16 x i64>, ptr addrspace(1) %ptr_a, align 4
@ -402,16 +405,17 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1,
; GCN-SDAG-LABEL: test_v7i16_load_store_kernel:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GCN-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GCN-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GCN-SDAG-NEXT: v_mov_b64_e32 v[8:9], 12
; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 8
; GCN-SDAG-NEXT: v_mov_b64_e32 v[12:13], 0
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
; GCN-SDAG-NEXT: s_clause 0x1
; GCN-SDAG-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset
; GCN-SDAG-NEXT: global_load_b128 v[4:7], v4, s[2:3] scale_offset
; GCN-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset
; GCN-SDAG-NEXT: global_load_b128 v[4:7], v8, s[2:3] scale_offset
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: v_mov_b64_e32 v[8:9], 12
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
; GCN-SDAG-NEXT: v_pk_add_u16 v3, v3, v7
; GCN-SDAG-NEXT: v_pk_add_u16 v2, v2, v6
@ -428,10 +432,9 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1,
; GCN-GISEL-LABEL: test_v7i16_load_store_kernel:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GCN-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GCN-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GCN-GISEL-NEXT: s_wait_xcnt 0x0
; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GCN-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0
; GCN-GISEL-NEXT: v_mov_b64_e32 v[10:11], 2
; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 4
; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 6
@ -440,8 +443,10 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1,
; GCN-GISEL-NEXT: v_mov_b64_e32 v[20:21], 12
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
; GCN-GISEL-NEXT: s_clause 0x1
; GCN-GISEL-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset
; GCN-GISEL-NEXT: global_load_b128 v[4:7], v4, s[2:3] scale_offset
; GCN-GISEL-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset
; GCN-GISEL-NEXT: global_load_b128 v[4:7], v8, s[2:3] scale_offset
; GCN-GISEL-NEXT: s_wait_xcnt 0x0
; GCN-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
; GCN-GISEL-NEXT: v_pk_add_u16 v0, v0, v4
; GCN-GISEL-NEXT: v_pk_add_u16 v1, v1, v5

View File

@ -11,14 +11,11 @@ declare i32 @llvm.amdgcn.cluster.id.z() #0
define amdgpu_kernel void @test_cluster_id_x(ptr addrspace(1) %out) {
; CHECK-UNKNOWN-LABEL: test_cluster_id_x:
; CHECK-UNKNOWN: ; %bb.0:
; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; CHECK-UNKNOWN-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
; CHECK-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
; CHECK-UNKNOWN-NEXT: global_store_b32 v1, v0, s[2:3]
; CHECK-UNKNOWN-NEXT: s_endpgm
; CHECK-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; CHECK-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; CHECK-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
;
; CHECK-MESA3D-LABEL: test_cluster_id_x:
; CHECK-MESA3D: .amd_kernel_code_t
@ -68,7 +65,7 @@ define amdgpu_kernel void @test_cluster_id_x(ptr addrspace(1) %out) {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -98,14 +95,11 @@ define amdgpu_kernel void @test_cluster_id_x(ptr addrspace(1) %out) {
;
; CHECK-G-UNKNOWN-LABEL: test_cluster_id_x:
; CHECK-G-UNKNOWN: ; %bb.0:
; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[2:3]
; CHECK-G-UNKNOWN-NEXT: s_endpgm
; CHECK-G-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; CHECK-G-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; CHECK-G-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
;
; CHECK-G-MESA3D-LABEL: test_cluster_id_x:
; CHECK-G-MESA3D: .amd_kernel_code_t
@ -155,7 +149,7 @@ define amdgpu_kernel void @test_cluster_id_x(ptr addrspace(1) %out) {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -190,14 +184,11 @@ define amdgpu_kernel void @test_cluster_id_x(ptr addrspace(1) %out) {
define amdgpu_kernel void @test_cluster_id_y(ptr addrspace(1) %out) #1 {
; CHECK-UNKNOWN-LABEL: test_cluster_id_y:
; CHECK-UNKNOWN: ; %bb.0:
; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; CHECK-UNKNOWN-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, ttmp7 :: v_dual_mov_b32 v1, 0
; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
; CHECK-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
; CHECK-UNKNOWN-NEXT: global_store_b32 v1, v0, s[2:3]
; CHECK-UNKNOWN-NEXT: s_endpgm
; CHECK-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; CHECK-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
; CHECK-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
;
; CHECK-MESA3D-LABEL: test_cluster_id_y:
; CHECK-MESA3D: .amd_kernel_code_t
@ -247,7 +238,7 @@ define amdgpu_kernel void @test_cluster_id_y(ptr addrspace(1) %out) #1 {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -277,14 +268,11 @@ define amdgpu_kernel void @test_cluster_id_y(ptr addrspace(1) %out) #1 {
;
; CHECK-G-UNKNOWN-LABEL: test_cluster_id_y:
; CHECK-G-UNKNOWN: ; %bb.0:
; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, ttmp7 :: v_dual_mov_b32 v1, 0
; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[2:3]
; CHECK-G-UNKNOWN-NEXT: s_endpgm
; CHECK-G-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; CHECK-G-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
; CHECK-G-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
;
; CHECK-G-MESA3D-LABEL: test_cluster_id_y:
; CHECK-G-MESA3D: .amd_kernel_code_t
@ -334,7 +322,7 @@ define amdgpu_kernel void @test_cluster_id_y(ptr addrspace(1) %out) #1 {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -369,16 +357,14 @@ define amdgpu_kernel void @test_cluster_id_y(ptr addrspace(1) %out) #1 {
define amdgpu_kernel void @test_cluster_id_z(ptr addrspace(1) %out) #1 {
; CHECK-UNKNOWN-LABEL: test_cluster_id_z:
; CHECK-UNKNOWN: ; %bb.0:
; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; CHECK-UNKNOWN-NEXT: s_lshr_b32 s2, ttmp7, 16
; CHECK-UNKNOWN-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; CHECK-UNKNOWN-NEXT: s_wait_xcnt 0x0
; CHECK-UNKNOWN-NEXT: s_lshr_b32 s0, ttmp7, 16
; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[2:3]
; CHECK-UNKNOWN-NEXT: s_endpgm
; CHECK-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; CHECK-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; CHECK-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
;
; CHECK-MESA3D-LABEL: test_cluster_id_z:
; CHECK-MESA3D: .amd_kernel_code_t
@ -428,7 +414,7 @@ define amdgpu_kernel void @test_cluster_id_z(ptr addrspace(1) %out) #1 {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -460,16 +446,14 @@ define amdgpu_kernel void @test_cluster_id_z(ptr addrspace(1) %out) #1 {
;
; CHECK-G-UNKNOWN-LABEL: test_cluster_id_z:
; CHECK-G-UNKNOWN: ; %bb.0:
; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; CHECK-G-UNKNOWN-NEXT: s_lshr_b32 s2, ttmp7, 16
; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; CHECK-G-UNKNOWN-NEXT: s_wait_xcnt 0x0
; CHECK-G-UNKNOWN-NEXT: s_lshr_b32 s0, ttmp7, 16
; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[2:3]
; CHECK-G-UNKNOWN-NEXT: s_endpgm
; CHECK-G-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; CHECK-G-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; CHECK-G-UNKNOWN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
;
; CHECK-G-MESA3D-LABEL: test_cluster_id_z:
; CHECK-G-MESA3D: .amd_kernel_code_t
@ -519,7 +503,7 @@ define amdgpu_kernel void @test_cluster_id_z(ptr addrspace(1) %out) #1 {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0

View File

@ -67,7 +67,7 @@ define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -155,7 +155,7 @@ define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -246,7 +246,7 @@ define amdgpu_kernel void @test_workgroup_id_x_optimized(ptr addrspace(1) %out)
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -330,7 +330,7 @@ define amdgpu_kernel void @test_workgroup_id_x_optimized(ptr addrspace(1) %out)
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -421,7 +421,7 @@ define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -509,7 +509,7 @@ define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -600,7 +600,7 @@ define amdgpu_kernel void @test_workgroup_id_y_optimized(ptr addrspace(1) %out)
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -684,7 +684,7 @@ define amdgpu_kernel void @test_workgroup_id_y_optimized(ptr addrspace(1) %out)
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -775,7 +775,7 @@ define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -863,7 +863,7 @@ define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -956,7 +956,7 @@ define amdgpu_kernel void @test_workgroup_flat_id(ptr addrspace(1) %out) {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -1044,7 +1044,7 @@ define amdgpu_kernel void @test_workgroup_flat_id(ptr addrspace(1) %out) {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -1135,7 +1135,7 @@ define amdgpu_kernel void @test_workgroup_id_z_optimized(ptr addrspace(1) %out)
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -1219,7 +1219,7 @@ define amdgpu_kernel void @test_workgroup_id_z_optimized(ptr addrspace(1) %out)
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0

View File

@ -65,7 +65,7 @@ define amdgpu_kernel void @test_workgroup_max_flat_id(ptr addrspace(1) %out) #1
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -153,7 +153,7 @@ define amdgpu_kernel void @test_workgroup_max_flat_id(ptr addrspace(1) %out) #1
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0

View File

@ -67,7 +67,7 @@ define amdgpu_kernel void @test_workgroup_max_id_x(ptr addrspace(1) %out) #1 {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -155,7 +155,7 @@ define amdgpu_kernel void @test_workgroup_max_id_x(ptr addrspace(1) %out) #1 {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -246,7 +246,7 @@ define amdgpu_kernel void @test_workgroup_max_id_x_optimized(ptr addrspace(1) %o
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -330,7 +330,7 @@ define amdgpu_kernel void @test_workgroup_max_id_x_optimized(ptr addrspace(1) %o
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -421,7 +421,7 @@ define amdgpu_kernel void @test_workgroup_max_id_y(ptr addrspace(1) %out) #1 {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -509,7 +509,7 @@ define amdgpu_kernel void @test_workgroup_max_id_y(ptr addrspace(1) %out) #1 {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -600,7 +600,7 @@ define amdgpu_kernel void @test_workgroup_max_id_y_optimized(ptr addrspace(1) %o
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -684,7 +684,7 @@ define amdgpu_kernel void @test_workgroup_max_id_y_optimized(ptr addrspace(1) %o
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -775,7 +775,7 @@ define amdgpu_kernel void @test_workgroup_max_id_z(ptr addrspace(1) %out) #1 {
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -863,7 +863,7 @@ define amdgpu_kernel void @test_workgroup_max_id_z(ptr addrspace(1) %out) #1 {
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
@ -954,7 +954,7 @@ define amdgpu_kernel void @test_workgroup_max_id_z_optimized(ptr addrspace(1) %o
; CHECK-MESA3D-NEXT: is_ptr64 = 1
; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
@ -1038,7 +1038,7 @@ define amdgpu_kernel void @test_workgroup_max_id_z_optimized(ptr addrspace(1) %o
; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 1
; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0

View File

@ -5,13 +5,13 @@
define amdgpu_kernel void @v_permlane_bcast_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
; GFX1250-LABEL: v_permlane_bcast_b32_vss:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34
; GFX1250-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_permlane_bcast_b32 v0, v0, s3, s4
; GFX1250-NEXT: v_permlane_bcast_b32 v0, v0, s3, s6
; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1250-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 %src1, i32 %src2)
@ -92,13 +92,13 @@ define amdgpu_kernel void @v_permlane_bcast_b32_vvv(ptr addrspace(1) %out, i32 %
define amdgpu_kernel void @v_permlane_down_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
; GFX1250-LABEL: v_permlane_down_b32_vss:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34
; GFX1250-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_permlane_down_b32 v0, v0, s3, s4
; GFX1250-NEXT: v_permlane_down_b32 v0, v0, s3, s6
; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1250-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 %src1, i32 %src2)
@ -179,13 +179,13 @@ define amdgpu_kernel void @v_permlane_down_b32_vvv(ptr addrspace(1) %out, i32 %s
define amdgpu_kernel void @v_permlane_up_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
; GFX1250-LABEL: v_permlane_up_b32_vss:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34
; GFX1250-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_permlane_up_b32 v0, v0, s3, s4
; GFX1250-NEXT: v_permlane_up_b32 v0, v0, s3, s6
; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1250-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 %src1, i32 %src2)
@ -266,13 +266,13 @@ define amdgpu_kernel void @v_permlane_up_b32_vvv(ptr addrspace(1) %out, i32 %src
define amdgpu_kernel void @v_permlane_xor_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
; GFX1250-LABEL: v_permlane_xor_b32_vss:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34
; GFX1250-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_permlane_xor_b32 v0, v0, s3, s4
; GFX1250-NEXT: v_permlane_xor_b32 v0, v0, s3, s6
; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1250-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 %src1, i32 %src2)

View File

@ -69,9 +69,9 @@ body: |
bb.0:
; GCN-LABEL: name: merge_s_load_x1_x1_imm_no_scale_offset
; GCN: [[DEF:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[DEF]], 0, 0 :: (dereferenceable invariant load (s64), align 4)
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_LOAD_DWORDX2_IMM]].sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1
; GCN-NEXT: early-clobber %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec [[DEF]], 0, 0 :: (dereferenceable invariant load (s64), align 4)
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY %3.sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed %3.sub1
%0:sgpr_64 = IMPLICIT_DEF
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 0, 0 :: (dereferenceable invariant load (s32))
%2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 4, 0 :: (dereferenceable invariant load (s32))

View File

@ -471,13 +471,13 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r
; GFX1250-NEXT: s_cmp_eq_u32 s0, 0
; GFX1250-NEXT: s_cbranch_scc1 .LBB4_3
; GFX1250-NEXT: ; %bb.1: ; %for.body.preheader
; GFX1250-NEXT: s_load_b128 s[4:7], s[4:5], 0x24
; GFX1250-NEXT: s_load_b128 s[8:11], s[4:5], 0x24
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 4, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], s[6:7], v[0:1]
; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[4:5], v[0:1]
; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], s[10:11], v[0:1]
; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[8:9], v[0:1]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], 0xb0, v[2:3]
; GFX1250-NEXT: .LBB4_2: ; %for.body
@ -602,13 +602,13 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d,
; GFX1250-NEXT: s_cmp_eq_u32 s0, 0
; GFX1250-NEXT: s_cbranch_scc1 .LBB5_3
; GFX1250-NEXT: ; %bb.1: ; %for.body.preheader
; GFX1250-NEXT: s_load_b128 s[4:7], s[4:5], 0x24
; GFX1250-NEXT: s_load_b128 s[8:11], s[4:5], 0x24
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 4, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], s[6:7], v[0:1]
; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[4:5], v[0:1]
; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], s[10:11], v[0:1]
; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[8:9], v[0:1]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], 0xb0, v[2:3]
; GFX1250-NEXT: .LBB5_2: ; %for.body

View File

@ -1117,18 +1117,19 @@ define amdgpu_kernel void @mad_i64_i32_uniform(ptr addrspace(1) %out, i32 %arg0,
;
; GFX1250-LABEL: mad_i64_i32_uniform:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_mov_b32 s7, 0
; GFX1250-NEXT: s_mov_b32 s5, 0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s6, s2
; GFX1250-NEXT: s_mov_b32 s4, s2
; GFX1250-NEXT: s_mov_b32 s2, s3
; GFX1250-NEXT: s_mov_b32 s3, s7
; GFX1250-NEXT: s_mov_b32 s3, s5
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_mul_u64 s[2:3], s[6:7], s[2:3]
; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5]
; GFX1250-NEXT: s_mul_u64 s[2:3], s[4:5], s[2:3]
; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[6:7]
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]

View File

@ -257,16 +257,15 @@ define amdgpu_kernel void @v_test_imax_sge_i8(ptr addrspace(1) %out, ptr addrspa
;
; GFX1250-LABEL: v_test_imax_sge_i8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_i8 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_i8 s3, s[4:5], 0x0
; GFX1250-NEXT: s_load_i8 s4, s[2:3], 0x0
; GFX1250-NEXT: s_load_i8 s5, s[6:7], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_max_i32 s2, s2, s3
; GFX1250-NEXT: s_max_i32 s2, s4, s5
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_store_b8 v0, v1, s[0:1]
@ -701,16 +700,15 @@ define amdgpu_kernel void @v_test_umax_uge_i8(ptr addrspace(1) %out, ptr addrspa
;
; GFX1250-LABEL: v_test_umax_uge_i8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_u8 s3, s[4:5], 0x0
; GFX1250-NEXT: s_load_u8 s4, s[2:3], 0x0
; GFX1250-NEXT: s_load_u8 s5, s[6:7], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_max_u32 s2, s2, s3
; GFX1250-NEXT: s_max_u32 s2, s4, s5
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_store_b8 v0, v1, s[0:1]
@ -777,13 +775,12 @@ define amdgpu_kernel void @v_test_umax_ugt_i32(ptr addrspace(1) %out, ptr addrsp
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset
; GFX1250-NEXT: s_load_b32 s2, s[0:1], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_load_b32 s6, s[0:1], 0x0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_max_u32_e32 v0, s2, v0
; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1250-NEXT: v_max_u32_e32 v0, s6, v0
; GFX1250-NEXT: global_store_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_endpgm
;
; EG-LABEL: v_test_umax_ugt_i32:
@ -1122,12 +1119,12 @@ define amdgpu_kernel void @test_umax_ugt_i64(ptr addrspace(1) %out, i64 %a, i64
;
; GFX1250-LABEL: test_umax_ugt_i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_max_u64 v[0:1], s[2:3], s[4:5]
; GFX1250-NEXT: v_max_u64 v[0:1], s[2:3], s[6:7]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: s_endpgm
;
@ -1175,12 +1172,12 @@ define amdgpu_kernel void @test_umax_uge_i64(ptr addrspace(1) %out, i64 %a, i64
;
; GFX1250-LABEL: test_umax_uge_i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_max_u64 v[0:1], s[2:3], s[4:5]
; GFX1250-NEXT: v_max_u64 v[0:1], s[2:3], s[6:7]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: s_endpgm
;
@ -1228,12 +1225,12 @@ define amdgpu_kernel void @test_imax_sgt_i64(ptr addrspace(1) %out, i64 %a, i64
;
; GFX1250-LABEL: test_imax_sgt_i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_max_i64 v[0:1], s[2:3], s[4:5]
; GFX1250-NEXT: v_max_i64 v[0:1], s[2:3], s[6:7]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: s_endpgm
;
@ -1281,12 +1278,12 @@ define amdgpu_kernel void @test_imax_sge_i64(ptr addrspace(1) %out, i64 %a, i64
;
; GFX1250-LABEL: test_imax_sge_i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_max_i64 v[0:1], s[2:3], s[4:5]
; GFX1250-NEXT: v_max_i64 v[0:1], s[2:3], s[6:7]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: s_endpgm
;

View File

@ -131,14 +131,14 @@ define amdgpu_kernel void @v_test_imin_sle_i32(ptr addrspace(1) %out, ptr addrsp
;
; GFX1250-LABEL: v_test_imin_sle_i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[4:5] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[6:7] scale_offset
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_min_i32_e32 v1, v1, v2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset
@ -1172,14 +1172,14 @@ define amdgpu_kernel void @s_test_imin_sle_v4i16(ptr addrspace(1) %out, <4 x i16
;
; GFX1250-LABEL: s_test_imin_sle_v4i16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x8
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_min_i16 v1, s1, s3
; GFX1250-NEXT: v_pk_min_i16 v0, s0, s2
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[4:5]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[6:7]
; GFX1250-NEXT: s_endpgm
%cmp = icmp sle <4 x i16> %a, %b
%val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
@ -1307,14 +1307,14 @@ define amdgpu_kernel void @v_test_imin_slt_i32(ptr addrspace(1) %out, ptr addrsp
;
; GFX1250-LABEL: v_test_imin_slt_i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[4:5] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[6:7] scale_offset
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_min_i32_e32 v1, v1, v2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset
@ -1484,14 +1484,14 @@ define amdgpu_kernel void @v_test_imin_slt_i16(ptr addrspace(1) %out, ptr addrsp
;
; GFX1250-LABEL: v_test_imin_slt_i16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_u16 v1, v0, s[2:3] scale_offset
; GFX1250-NEXT: global_load_u16 v2, v0, s[4:5] scale_offset
; GFX1250-NEXT: global_load_u16 v2, v0, s[6:7] scale_offset
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_min_i16 v1, v1, v2
; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1] scale_offset
@ -1686,16 +1686,16 @@ define amdgpu_kernel void @s_test_imin_slt_v2i32(ptr addrspace(1) %out, <2 x i32
;
; GFX1250-LABEL: s_test_imin_slt_v2i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x8
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_min_i32 s0, s0, s2
; GFX1250-NEXT: s_min_i32 s1, s1, s3
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[4:5]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[6:7]
; GFX1250-NEXT: s_endpgm
%cmp = icmp slt <2 x i32> %a, %b
%val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
@ -2011,14 +2011,14 @@ define amdgpu_kernel void @v_test_umin_ule_i32(ptr addrspace(1) %out, ptr addrsp
;
; GFX1250-LABEL: v_test_umin_ule_i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[4:5] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[6:7] scale_offset
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_min_u32_e32 v1, v1, v2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset
@ -2171,16 +2171,16 @@ define amdgpu_kernel void @v_test_umin_ule_v3i32(ptr addrspace(1) %out, ptr addr
;
; GFX1250-LABEL: v_test_umin_ule_v3i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_lshlrev_b32_e32 v3, 4, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b96 v[0:2], v3, s[2:3]
; GFX1250-NEXT: global_load_b96 v[4:6], v3, s[4:5]
; GFX1250-NEXT: global_load_b96 v[4:6], v3, s[6:7]
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_min_u32_e32 v2, v2, v6
; GFX1250-NEXT: v_min_u32_e32 v1, v1, v5
@ -2374,14 +2374,14 @@ define amdgpu_kernel void @v_test_umin_ule_v3i16(ptr addrspace(1) %out, ptr addr
;
; GFX1250-LABEL: v_test_umin_ule_v3i16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b64 v[0:1], v4, s[2:3] scale_offset
; GFX1250-NEXT: global_load_b64 v[2:3], v4, s[4:5] scale_offset
; GFX1250-NEXT: global_load_b64 v[2:3], v4, s[6:7] scale_offset
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_lshlrev_b32_e32 v4, 3, v4
; GFX1250-NEXT: s_wait_loadcnt 0x0
@ -2611,14 +2611,14 @@ define amdgpu_kernel void @v_test_umin_ult_i32(ptr addrspace(1) %out, ptr addrsp
;
; GFX1250-LABEL: v_test_umin_ult_i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[4:5] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[6:7] scale_offset
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_min_u32_e32 v1, v1, v2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset
@ -2771,14 +2771,14 @@ define amdgpu_kernel void @v_test_umin_ult_i8(ptr addrspace(1) %out, ptr addrspa
;
; GFX1250-LABEL: v_test_umin_ult_i8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_u8 v1, v0, s[2:3]
; GFX1250-NEXT: global_load_u8 v2, v0, s[4:5]
; GFX1250-NEXT: global_load_u8 v2, v0, s[6:7]
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_min_u16 v1, v1, v2
; GFX1250-NEXT: global_store_b8 v0, v1, s[0:1]
@ -3023,23 +3023,22 @@ define amdgpu_kernel void @v_test_umin_ult_i32_multi_use(ptr addrspace(1) %out0,
;
; GFX1250-LABEL: v_test_umin_ult_i32_multi_use:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x0
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b32 s5, s[6:7], 0x0
; GFX1250-NEXT: s_load_b32 s0, s[12:13], 0x0
; GFX1250-NEXT: s_load_b32 s1, s[14:15], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_cmp_lt_u32 s4, s5
; GFX1250-NEXT: s_cselect_b32 s6, -1, 0
; GFX1250-NEXT: s_cmp_lt_u32 s0, s1
; GFX1250-NEXT: s_cselect_b32 s2, -1, 0
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6
; GFX1250-NEXT: s_and_b32 s6, s6, exec_lo
; GFX1250-NEXT: s_cselect_b32 s4, s4, s5
; GFX1250-NEXT: v_mov_b32_e32 v2, s4
; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
; GFX1250-NEXT: s_and_b32 s2, s2, exec_lo
; GFX1250-NEXT: s_cselect_b32 s0, s0, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_store_b32 v1, v2, s[0:1]
; GFX1250-NEXT: global_store_b8 v1, v0, s[2:3]
; GFX1250-NEXT: global_store_b32 v1, v2, s[8:9]
; GFX1250-NEXT: global_store_b8 v1, v0, s[10:11]
; GFX1250-NEXT: s_endpgm
%a = load i32, ptr addrspace(1) %aptr, align 4
%b = load i32, ptr addrspace(1) %bptr, align 4
@ -3220,12 +3219,12 @@ define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(ptr addrspace(1) %out0,
;
; GFX1250-LABEL: v_test_umin_ult_i16_multi_use:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x0
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_u16 v1, v0, s[6:7]
; GFX1250-NEXT: global_load_u16 v2, v0, s[4:5]
; GFX1250-NEXT: global_load_u16 v1, v0, s[14:15]
; GFX1250-NEXT: global_load_u16 v2, v0, s[12:13]
; GFX1250-NEXT: s_wait_loadcnt 0x1
; GFX1250-NEXT: v_and_b32_e32 v3, 0xffff, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
@ -3235,8 +3234,8 @@ define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(ptr addrspace(1) %out0,
; GFX1250-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX1250-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX1250-NEXT: global_store_b8 v0, v2, s[2:3]
; GFX1250-NEXT: global_store_b16 v0, v1, s[8:9]
; GFX1250-NEXT: global_store_b8 v0, v2, s[10:11]
; GFX1250-NEXT: s_endpgm
%a = load i16, ptr addrspace(1) %aptr, align 2
%b = load i16, ptr addrspace(1) %bptr, align 2
@ -4338,12 +4337,12 @@ define amdgpu_kernel void @test_umin_ult_i64(ptr addrspace(1) %out, i64 %a, i64
;
; GFX1250-LABEL: test_umin_ult_i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_min_u64 v[0:1], s[2:3], s[4:5]
; GFX1250-NEXT: v_min_u64 v[0:1], s[2:3], s[6:7]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: s_endpgm
%tmp = icmp ult i64 %a, %b
@ -4462,12 +4461,12 @@ define amdgpu_kernel void @test_umin_ule_i64(ptr addrspace(1) %out, i64 %a, i64
;
; GFX1250-LABEL: test_umin_ule_i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_min_u64 v[0:1], s[2:3], s[4:5]
; GFX1250-NEXT: v_min_u64 v[0:1], s[2:3], s[6:7]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: s_endpgm
%tmp = icmp ule i64 %a, %b
@ -4586,12 +4585,12 @@ define amdgpu_kernel void @test_imin_slt_i64(ptr addrspace(1) %out, i64 %a, i64
;
; GFX1250-LABEL: test_imin_slt_i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_min_i64 v[0:1], s[2:3], s[4:5]
; GFX1250-NEXT: v_min_i64 v[0:1], s[2:3], s[6:7]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: s_endpgm
%tmp = icmp slt i64 %a, %b
@ -4710,12 +4709,12 @@ define amdgpu_kernel void @test_imin_sle_i64(ptr addrspace(1) %out, i64 %a, i64
;
; GFX1250-LABEL: test_imin_sle_i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_min_i64 v[0:1], s[2:3], s[4:5]
; GFX1250-NEXT: v_min_i64 v[0:1], s[2:3], s[6:7]
; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1250-NEXT: s_endpgm
%tmp = icmp sle i64 %a, %b
@ -4872,14 +4871,14 @@ define amdgpu_kernel void @v_test_imin_sle_v2i16(ptr addrspace(1) %out, ptr addr
;
; GFX1250-LABEL: v_test_imin_sle_v2i16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[4:5] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[6:7] scale_offset
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_pk_min_i16 v1, v1, v2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset
@ -5042,14 +5041,14 @@ define amdgpu_kernel void @v_test_imin_ule_v2i16(ptr addrspace(1) %out, ptr addr
;
; GFX1250-LABEL: v_test_imin_ule_v2i16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[4:5] scale_offset
; GFX1250-NEXT: global_load_b32 v2, v0, s[6:7] scale_offset
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_pk_min_u16 v1, v1, v2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset

View File

@ -450,6 +450,7 @@ define amdgpu_kernel void @s_trunc_i64_mul_to_i32(ptr addrspace(1) %out, i64 %a,
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x34
; GFX1250-NEXT: ; kill: killed $sgpr4_sgpr5
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mul_i32 s2, s3, s2
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
@ -613,25 +614,25 @@ define amdgpu_kernel void @v_trunc_i64_mul_to_i32(ptr addrspace(1) %out, ptr add
;
; GFX1250-LABEL: v_trunc_i64_mul_to_i32:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_mov_b32 s10, -1
; GFX1250-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s10
; GFX1250-NEXT: s_mov_b32 s15, s11
; GFX1250-NEXT: s_mov_b32 s6, s10
; GFX1250-NEXT: s_mov_b32 s7, s11
; GFX1250-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
; GFX1250-NEXT: s_mov_b32 s6, -1
; GFX1250-NEXT: s_mov_b32 s7, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s6
; GFX1250-NEXT: s_mov_b32 s15, s7
; GFX1250-NEXT: s_mov_b32 s10, s6
; GFX1250-NEXT: s_mov_b32 s11, s7
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s12, s2
; GFX1250-NEXT: s_mov_b32 s13, s3
; GFX1250-NEXT: buffer_load_b32 v0, off, s[12:15], null
; GFX1250-NEXT: buffer_load_b32 v1, off, s[4:7], null
; GFX1250-NEXT: s_mov_b32 s8, s0
; GFX1250-NEXT: s_mov_b32 s9, s1
; GFX1250-NEXT: buffer_load_b32 v1, off, s[8:11], null
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_mul_lo_u32 v0, v1, v0
; GFX1250-NEXT: buffer_store_b32 v0, off, s[8:11], null
; GFX1250-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-NEXT: s_endpgm
;
; EG-LABEL: v_trunc_i64_mul_to_i32:
@ -2091,11 +2092,11 @@ define amdgpu_kernel void @s_mul_i64(ptr addrspace(1) %out, i64 %a, i64 %b) noun
;
; GFX1250-LABEL: s_mul_i64:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mul_u64 s[4:5], s[2:3], s[4:5]
; GFX1250-NEXT: s_mul_u64 s[4:5], s[2:3], s[6:7]
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
; GFX1250-NEXT: s_mov_b32 s2, -1
@ -2292,25 +2293,25 @@ define amdgpu_kernel void @v_mul_i64(ptr addrspace(1) %out, ptr addrspace(1) %ap
;
; GFX1250-LABEL: v_mul_i64:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-NEXT: s_mov_b32 s10, -1
; GFX1250-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s10
; GFX1250-NEXT: s_mov_b32 s15, s11
; GFX1250-NEXT: s_mov_b32 s6, s10
; GFX1250-NEXT: s_mov_b32 s7, s11
; GFX1250-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
; GFX1250-NEXT: s_mov_b32 s6, -1
; GFX1250-NEXT: s_mov_b32 s7, 0x31016000
; GFX1250-NEXT: s_mov_b32 s14, s6
; GFX1250-NEXT: s_mov_b32 s15, s7
; GFX1250-NEXT: s_mov_b32 s10, s6
; GFX1250-NEXT: s_mov_b32 s11, s7
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s12, s2
; GFX1250-NEXT: s_mov_b32 s13, s3
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[12:15], null
; GFX1250-NEXT: buffer_load_b64 v[2:3], off, s[4:7], null
; GFX1250-NEXT: s_mov_b32 s8, s0
; GFX1250-NEXT: s_mov_b32 s9, s1
; GFX1250-NEXT: buffer_load_b64 v[2:3], off, s[8:11], null
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_mul_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null
; GFX1250-NEXT: s_endpgm
;
; EG-LABEL: v_mul_i64:
@ -2845,30 +2846,30 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace(
;
; GFX1250-LABEL: mul64_in_branch:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_cmp_lg_u64 s[4:5], 0
; GFX1250-NEXT: s_cmp_lg_u64 s[12:13], 0
; GFX1250-NEXT: s_cbranch_scc0 .LBB16_3
; GFX1250-NEXT: ; %bb.1: ; %else
; GFX1250-NEXT: s_mul_u64 s[4:5], s[4:5], s[6:7]
; GFX1250-NEXT: s_mul_u64 s[0:1], s[12:13], s[14:15]
; GFX1250-NEXT: s_cbranch_execnz .LBB16_4
; GFX1250-NEXT: .LBB16_2: ; %if
; GFX1250-NEXT: s_mov_b32 s7, 0x31016000
; GFX1250-NEXT: s_mov_b32 s6, -1
; GFX1250-NEXT: s_mov_b32 s4, s2
; GFX1250-NEXT: s_mov_b32 s5, s3
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[4:7], null
; GFX1250-NEXT: s_branch .LBB16_5
; GFX1250-NEXT: .LBB16_3:
; GFX1250-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1250-NEXT: s_branch .LBB16_2
; GFX1250-NEXT: .LBB16_4:
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
; GFX1250-NEXT: .LBB16_5: ; %endif
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-NEXT: s_mov_b32 s2, -1
; GFX1250-NEXT: s_mov_b32 s0, s10
; GFX1250-NEXT: s_mov_b32 s1, s11
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[0:3], null
; GFX1250-NEXT: s_branch .LBB16_5
; GFX1250-NEXT: .LBB16_3:
; GFX1250-NEXT: ; implicit-def: $sgpr0_sgpr1
; GFX1250-NEXT: s_branch .LBB16_2
; GFX1250-NEXT: .LBB16_4:
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-NEXT: .LBB16_5: ; %endif
; GFX1250-NEXT: s_mov_b32 s11, 0x31016000
; GFX1250-NEXT: s_mov_b32 s10, -1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
; GFX1250-NEXT: s_endpgm
;
; EG-LABEL: mul64_in_branch:

View File

@ -340,46 +340,46 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
;
; GFX1250-SDAG-LABEL: fadd_v32_vs:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_clause 0x7
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v56, s[34:35] offset:16
; GFX1250-SDAG-NEXT: global_load_b128 v[4:7], v56, s[34:35] offset:48
; GFX1250-SDAG-NEXT: global_load_b128 v[8:11], v56, s[34:35] offset:32
; GFX1250-SDAG-NEXT: global_load_b128 v[12:15], v56, s[34:35]
; GFX1250-SDAG-NEXT: global_load_b128 v[16:19], v56, s[34:35] offset:80
; GFX1250-SDAG-NEXT: global_load_b128 v[20:23], v56, s[34:35] offset:96
; GFX1250-SDAG-NEXT: global_load_b128 v[24:27], v56, s[34:35] offset:64
; GFX1250-SDAG-NEXT: global_load_b128 v[28:31], v56, s[34:35] offset:112
; GFX1250-SDAG-NEXT: s_load_b512 s[16:31], s[4:5], 0xa4
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_load_b512 s[0:15], s[4:5], 0xe4
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v56, s[0:1] offset:16
; GFX1250-SDAG-NEXT: global_load_b128 v[4:7], v56, s[0:1] offset:48
; GFX1250-SDAG-NEXT: global_load_b128 v[8:11], v56, s[0:1] offset:32
; GFX1250-SDAG-NEXT: global_load_b128 v[12:15], v56, s[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[16:19], v56, s[0:1] offset:80
; GFX1250-SDAG-NEXT: global_load_b128 v[20:23], v56, s[0:1] offset:96
; GFX1250-SDAG-NEXT: global_load_b128 v[24:27], v56, s[0:1] offset:64
; GFX1250-SDAG-NEXT: global_load_b128 v[28:31], v56, s[0:1] offset:112
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v32, s20 :: v_dual_mov_b32 v33, s21
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v34, s22 :: v_dual_mov_b32 v35, s23
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v36, s18 :: v_dual_mov_b32 v39, s29
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v40, s30 :: v_dual_mov_b32 v41, s31
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v42, s24 :: v_dual_mov_b32 v37, s19
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v38, s28 :: v_dual_mov_b32 v55, s15
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v51, s3 :: v_dual_mov_b32 v52, s12
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v53, s13 :: v_dual_mov_b32 v54, s14
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v49, s7 :: v_dual_mov_b32 v50, s2
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v45, s27 :: v_dual_mov_b32 v46, s4
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v47, s5 :: v_dual_mov_b32 v48, s6
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v43, s25 :: v_dual_mov_b32 v44, s26
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v32, s40 :: v_dual_mov_b32 v33, s41
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v34, s42 :: v_dual_mov_b32 v35, s43
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v36, s38 :: v_dual_mov_b32 v39, s49
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v40, s50 :: v_dual_mov_b32 v41, s51
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v42, s44 :: v_dual_mov_b32 v37, s39
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v38, s48 :: v_dual_mov_b32 v55, s23
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v51, s11 :: v_dual_mov_b32 v52, s20
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v53, s21 :: v_dual_mov_b32 v54, s22
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v49, s15 :: v_dual_mov_b32 v50, s10
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v45, s47 :: v_dual_mov_b32 v46, s12
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v47, s13 :: v_dual_mov_b32 v48, s14
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v43, s45 :: v_dual_mov_b32 v44, s46
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x7
; GFX1250-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[32:33]
; GFX1250-SDAG-NEXT: v_pk_add_f32 v[2:3], v[2:3], v[34:35]
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v32, s8 :: v_dual_mov_b32 v33, s9
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v34, s10 :: v_dual_mov_b32 v35, s11
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v32, s16 :: v_dual_mov_b32 v33, s17
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v34, s18 :: v_dual_mov_b32 v35, s19
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x6
; GFX1250-SDAG-NEXT: v_pk_add_f32 v[6:7], v[6:7], v[40:41]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[0:1]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[8:9]
; GFX1250-SDAG-NEXT: v_pk_add_f32 v[4:5], v[4:5], v[38:39]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[16:17]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[36:37]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x2
; GFX1250-SDAG-NEXT: v_pk_add_f32 v[20:21], v[20:21], v[32:33]
; GFX1250-SDAG-NEXT: v_pk_add_f32 v[22:23], v[22:23], v[34:35]
@ -395,58 +395,58 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-SDAG-NEXT: v_pk_add_f32 v[14:15], v[14:15], v[36:37]
; GFX1250-SDAG-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[38:39]
; GFX1250-SDAG-NEXT: s_clause 0x7
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[20:23], s[34:35] offset:96
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[28:31], s[34:35] offset:112
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[24:27], s[34:35] offset:64
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[16:19], s[34:35] offset:80
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[8:11], s[34:35] offset:32
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[4:7], s[34:35] offset:48
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[12:15], s[34:35]
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[0:3], s[34:35] offset:16
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[20:23], s[0:1] offset:96
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[28:31], s[0:1] offset:112
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[24:27], s[0:1] offset:64
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[16:19], s[0:1] offset:80
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[8:11], s[0:1] offset:32
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[4:7], s[0:1] offset:48
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[12:15], s[0:1]
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[0:3], s[0:1] offset:16
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: fadd_v32_vs:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_clause 0x7
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v56, s[34:35]
; GFX1250-GISEL-NEXT: global_load_b128 v[4:7], v56, s[34:35] offset:16
; GFX1250-GISEL-NEXT: global_load_b128 v[8:11], v56, s[34:35] offset:32
; GFX1250-GISEL-NEXT: global_load_b128 v[12:15], v56, s[34:35] offset:48
; GFX1250-GISEL-NEXT: global_load_b128 v[16:19], v56, s[34:35] offset:64
; GFX1250-GISEL-NEXT: global_load_b128 v[20:23], v56, s[34:35] offset:80
; GFX1250-GISEL-NEXT: global_load_b128 v[24:27], v56, s[34:35] offset:96
; GFX1250-GISEL-NEXT: global_load_b128 v[28:31], v56, s[34:35] offset:112
; GFX1250-GISEL-NEXT: s_load_b512 s[16:31], s[4:5], 0xa4
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_load_b512 s[0:15], s[4:5], 0xe4
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v56, s[0:1]
; GFX1250-GISEL-NEXT: global_load_b128 v[4:7], v56, s[0:1] offset:16
; GFX1250-GISEL-NEXT: global_load_b128 v[8:11], v56, s[0:1] offset:32
; GFX1250-GISEL-NEXT: global_load_b128 v[12:15], v56, s[0:1] offset:48
; GFX1250-GISEL-NEXT: global_load_b128 v[16:19], v56, s[0:1] offset:64
; GFX1250-GISEL-NEXT: global_load_b128 v[20:23], v56, s[0:1] offset:80
; GFX1250-GISEL-NEXT: global_load_b128 v[24:27], v56, s[0:1] offset:96
; GFX1250-GISEL-NEXT: global_load_b128 v[28:31], v56, s[0:1] offset:112
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[16:17]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[18:19]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[20:21]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[22:23]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[40:41], s[24:25]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[42:43], s[26:27]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[44:45], s[28:29]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[46:47], s[30:31]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[48:49], s[0:1]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[50:51], s[2:3]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[52:53], s[4:5]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[54:55], s[6:7]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[36:37]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[38:39]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[40:41]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[42:43]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[40:41], s[44:45]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[42:43], s[46:47]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[44:45], s[48:49]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[46:47], s[50:51]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[48:49], s[8:9]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[50:51], s[10:11]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[52:53], s[12:13]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[54:55], s[14:15]
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x7
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[32:33]
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[2:3], v[2:3], v[34:35]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[8:9]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[10:11]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[16:17]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[18:19]
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x6
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[4:5], v[4:5], v[36:37]
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[6:7], v[6:7], v[38:39]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[12:13]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[14:15]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[20:21]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[22:23]
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x5
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[40:41]
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[42:43]
@ -466,14 +466,14 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[28:29], v[28:29], v[36:37]
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[30:31], v[30:31], v[38:39]
; GFX1250-GISEL-NEXT: s_clause 0x7
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[0:3], s[34:35]
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[4:7], s[34:35] offset:16
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[8:11], s[34:35] offset:32
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[12:15], s[34:35] offset:48
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[16:19], s[34:35] offset:64
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[20:23], s[34:35] offset:80
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[24:27], s[34:35] offset:96
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[28:31], s[34:35] offset:112
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[0:3], s[0:1]
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[4:7], s[0:1] offset:16
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[8:11], s[0:1] offset:32
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[12:15], s[0:1] offset:48
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[16:19], s[0:1] offset:64
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[20:23], s[0:1] offset:80
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[24:27], s[0:1] offset:96
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[28:31], s[0:1] offset:112
; GFX1250-GISEL-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
@ -1597,46 +1597,46 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
;
; GFX1250-SDAG-LABEL: fmul_v32_vs:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_clause 0x7
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v56, s[34:35] offset:16
; GFX1250-SDAG-NEXT: global_load_b128 v[4:7], v56, s[34:35] offset:48
; GFX1250-SDAG-NEXT: global_load_b128 v[8:11], v56, s[34:35] offset:32
; GFX1250-SDAG-NEXT: global_load_b128 v[12:15], v56, s[34:35]
; GFX1250-SDAG-NEXT: global_load_b128 v[16:19], v56, s[34:35] offset:80
; GFX1250-SDAG-NEXT: global_load_b128 v[20:23], v56, s[34:35] offset:96
; GFX1250-SDAG-NEXT: global_load_b128 v[24:27], v56, s[34:35] offset:64
; GFX1250-SDAG-NEXT: global_load_b128 v[28:31], v56, s[34:35] offset:112
; GFX1250-SDAG-NEXT: s_load_b512 s[16:31], s[4:5], 0xa4
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_load_b512 s[0:15], s[4:5], 0xe4
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v56, s[0:1] offset:16
; GFX1250-SDAG-NEXT: global_load_b128 v[4:7], v56, s[0:1] offset:48
; GFX1250-SDAG-NEXT: global_load_b128 v[8:11], v56, s[0:1] offset:32
; GFX1250-SDAG-NEXT: global_load_b128 v[12:15], v56, s[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[16:19], v56, s[0:1] offset:80
; GFX1250-SDAG-NEXT: global_load_b128 v[20:23], v56, s[0:1] offset:96
; GFX1250-SDAG-NEXT: global_load_b128 v[24:27], v56, s[0:1] offset:64
; GFX1250-SDAG-NEXT: global_load_b128 v[28:31], v56, s[0:1] offset:112
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v32, s20 :: v_dual_mov_b32 v33, s21
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v34, s22 :: v_dual_mov_b32 v35, s23
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v36, s18 :: v_dual_mov_b32 v39, s29
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v40, s30 :: v_dual_mov_b32 v41, s31
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v42, s24 :: v_dual_mov_b32 v37, s19
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v38, s28 :: v_dual_mov_b32 v55, s15
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v51, s3 :: v_dual_mov_b32 v52, s12
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v53, s13 :: v_dual_mov_b32 v54, s14
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v49, s7 :: v_dual_mov_b32 v50, s2
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v45, s27 :: v_dual_mov_b32 v46, s4
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v47, s5 :: v_dual_mov_b32 v48, s6
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v43, s25 :: v_dual_mov_b32 v44, s26
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v32, s40 :: v_dual_mov_b32 v33, s41
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v34, s42 :: v_dual_mov_b32 v35, s43
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v36, s38 :: v_dual_mov_b32 v39, s49
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v40, s50 :: v_dual_mov_b32 v41, s51
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v42, s44 :: v_dual_mov_b32 v37, s39
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v38, s48 :: v_dual_mov_b32 v55, s23
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v51, s11 :: v_dual_mov_b32 v52, s20
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v53, s21 :: v_dual_mov_b32 v54, s22
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v49, s15 :: v_dual_mov_b32 v50, s10
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v45, s47 :: v_dual_mov_b32 v46, s12
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v47, s13 :: v_dual_mov_b32 v48, s14
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v43, s45 :: v_dual_mov_b32 v44, s46
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x7
; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[0:1], v[0:1], v[32:33]
; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[2:3], v[2:3], v[34:35]
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v32, s8 :: v_dual_mov_b32 v33, s9
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v34, s10 :: v_dual_mov_b32 v35, s11
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v32, s16 :: v_dual_mov_b32 v33, s17
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v34, s18 :: v_dual_mov_b32 v35, s19
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x6
; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[6:7], v[6:7], v[40:41]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[0:1]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[8:9]
; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[4:5], v[4:5], v[38:39]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[16:17]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[36:37]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x2
; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[20:21], v[20:21], v[32:33]
; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[22:23], v[22:23], v[34:35]
@ -1652,58 +1652,58 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[14:15], v[14:15], v[36:37]
; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[12:13], v[12:13], v[38:39]
; GFX1250-SDAG-NEXT: s_clause 0x7
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[20:23], s[34:35] offset:96
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[28:31], s[34:35] offset:112
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[24:27], s[34:35] offset:64
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[16:19], s[34:35] offset:80
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[8:11], s[34:35] offset:32
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[4:7], s[34:35] offset:48
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[12:15], s[34:35]
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[0:3], s[34:35] offset:16
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[20:23], s[0:1] offset:96
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[28:31], s[0:1] offset:112
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[24:27], s[0:1] offset:64
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[16:19], s[0:1] offset:80
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[8:11], s[0:1] offset:32
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[4:7], s[0:1] offset:48
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[12:15], s[0:1]
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[0:3], s[0:1] offset:16
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: fmul_v32_vs:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_clause 0x7
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v56, s[34:35]
; GFX1250-GISEL-NEXT: global_load_b128 v[4:7], v56, s[34:35] offset:16
; GFX1250-GISEL-NEXT: global_load_b128 v[8:11], v56, s[34:35] offset:32
; GFX1250-GISEL-NEXT: global_load_b128 v[12:15], v56, s[34:35] offset:48
; GFX1250-GISEL-NEXT: global_load_b128 v[16:19], v56, s[34:35] offset:64
; GFX1250-GISEL-NEXT: global_load_b128 v[20:23], v56, s[34:35] offset:80
; GFX1250-GISEL-NEXT: global_load_b128 v[24:27], v56, s[34:35] offset:96
; GFX1250-GISEL-NEXT: global_load_b128 v[28:31], v56, s[34:35] offset:112
; GFX1250-GISEL-NEXT: s_load_b512 s[16:31], s[4:5], 0xa4
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_load_b512 s[0:15], s[4:5], 0xe4
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v56, s[0:1]
; GFX1250-GISEL-NEXT: global_load_b128 v[4:7], v56, s[0:1] offset:16
; GFX1250-GISEL-NEXT: global_load_b128 v[8:11], v56, s[0:1] offset:32
; GFX1250-GISEL-NEXT: global_load_b128 v[12:15], v56, s[0:1] offset:48
; GFX1250-GISEL-NEXT: global_load_b128 v[16:19], v56, s[0:1] offset:64
; GFX1250-GISEL-NEXT: global_load_b128 v[20:23], v56, s[0:1] offset:80
; GFX1250-GISEL-NEXT: global_load_b128 v[24:27], v56, s[0:1] offset:96
; GFX1250-GISEL-NEXT: global_load_b128 v[28:31], v56, s[0:1] offset:112
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[16:17]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[18:19]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[20:21]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[22:23]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[40:41], s[24:25]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[42:43], s[26:27]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[44:45], s[28:29]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[46:47], s[30:31]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[48:49], s[0:1]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[50:51], s[2:3]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[52:53], s[4:5]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[54:55], s[6:7]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[36:37]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[38:39]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[40:41]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[42:43]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[40:41], s[44:45]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[42:43], s[46:47]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[44:45], s[48:49]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[46:47], s[50:51]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[48:49], s[8:9]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[50:51], s[10:11]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[52:53], s[12:13]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[54:55], s[14:15]
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x7
; GFX1250-GISEL-NEXT: v_pk_mul_f32 v[0:1], v[0:1], v[32:33]
; GFX1250-GISEL-NEXT: v_pk_mul_f32 v[2:3], v[2:3], v[34:35]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[8:9]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[10:11]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[16:17]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[18:19]
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x6
; GFX1250-GISEL-NEXT: v_pk_mul_f32 v[4:5], v[4:5], v[36:37]
; GFX1250-GISEL-NEXT: v_pk_mul_f32 v[6:7], v[6:7], v[38:39]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[12:13]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[14:15]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[20:21]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[22:23]
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x5
; GFX1250-GISEL-NEXT: v_pk_mul_f32 v[8:9], v[8:9], v[40:41]
; GFX1250-GISEL-NEXT: v_pk_mul_f32 v[10:11], v[10:11], v[42:43]
@ -1723,14 +1723,14 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-GISEL-NEXT: v_pk_mul_f32 v[28:29], v[28:29], v[36:37]
; GFX1250-GISEL-NEXT: v_pk_mul_f32 v[30:31], v[30:31], v[38:39]
; GFX1250-GISEL-NEXT: s_clause 0x7
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[0:3], s[34:35]
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[4:7], s[34:35] offset:16
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[8:11], s[34:35] offset:32
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[12:15], s[34:35] offset:48
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[16:19], s[34:35] offset:64
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[20:23], s[34:35] offset:80
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[24:27], s[34:35] offset:96
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[28:31], s[34:35] offset:112
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[0:3], s[0:1]
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[4:7], s[0:1] offset:16
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[8:11], s[0:1] offset:32
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[12:15], s[0:1] offset:48
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[16:19], s[0:1] offset:64
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[20:23], s[0:1] offset:80
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[24:27], s[0:1] offset:96
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[28:31], s[0:1] offset:112
; GFX1250-GISEL-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
@ -2428,46 +2428,46 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
;
; GFX1250-SDAG-LABEL: fma_v32_vs:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_clause 0x7
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v56, s[34:35] offset:16
; GFX1250-SDAG-NEXT: global_load_b128 v[4:7], v56, s[34:35] offset:48
; GFX1250-SDAG-NEXT: global_load_b128 v[8:11], v56, s[34:35] offset:32
; GFX1250-SDAG-NEXT: global_load_b128 v[12:15], v56, s[34:35]
; GFX1250-SDAG-NEXT: global_load_b128 v[16:19], v56, s[34:35] offset:80
; GFX1250-SDAG-NEXT: global_load_b128 v[20:23], v56, s[34:35] offset:96
; GFX1250-SDAG-NEXT: global_load_b128 v[24:27], v56, s[34:35] offset:64
; GFX1250-SDAG-NEXT: global_load_b128 v[28:31], v56, s[34:35] offset:112
; GFX1250-SDAG-NEXT: s_load_b512 s[16:31], s[4:5], 0xa4
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_load_b512 s[0:15], s[4:5], 0xe4
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v56, s[0:1] offset:16
; GFX1250-SDAG-NEXT: global_load_b128 v[4:7], v56, s[0:1] offset:48
; GFX1250-SDAG-NEXT: global_load_b128 v[8:11], v56, s[0:1] offset:32
; GFX1250-SDAG-NEXT: global_load_b128 v[12:15], v56, s[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[16:19], v56, s[0:1] offset:80
; GFX1250-SDAG-NEXT: global_load_b128 v[20:23], v56, s[0:1] offset:96
; GFX1250-SDAG-NEXT: global_load_b128 v[24:27], v56, s[0:1] offset:64
; GFX1250-SDAG-NEXT: global_load_b128 v[28:31], v56, s[0:1] offset:112
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[32:33], s[20:21]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[34:35], s[22:23]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[30:31]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[28:29]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[52:53], s[12:13]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[54:55], s[14:15]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[50:51], s[2:3]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[46:47], s[4:5]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[48:49], s[6:7]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[42:43], s[24:25]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[44:45], s[26:27]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[36:37], s[18:19]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[32:33], s[40:41]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[34:35], s[42:43]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[50:51]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[48:49]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[52:53], s[20:21]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[54:55], s[22:23]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[50:51], s[10:11]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[46:47], s[12:13]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[48:49], s[14:15]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[42:43], s[44:45]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[44:45], s[46:47]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[36:37], s[38:39]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x7
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], v[32:33], v[32:33]
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[2:3], v[2:3], v[34:35], v[34:35]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[32:33], s[8:9]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[34:35], s[10:11]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[32:33], s[16:17]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[34:35], s[18:19]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x6
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[6:7], v[6:7], v[40:41], v[40:41]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[0:1]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[8:9]
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[4:5], v[4:5], v[38:39], v[38:39]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[16:17]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[36:37]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[28:29], v[28:29], v[52:53], v[52:53]
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[20:21], v[20:21], v[32:33], v[32:33]
@ -2482,58 +2482,58 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[14:15], v[14:15], v[36:37], v[36:37]
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[12:13], v[12:13], v[38:39], v[38:39]
; GFX1250-SDAG-NEXT: s_clause 0x7
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[20:23], s[34:35] offset:96
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[28:31], s[34:35] offset:112
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[24:27], s[34:35] offset:64
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[16:19], s[34:35] offset:80
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[8:11], s[34:35] offset:32
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[4:7], s[34:35] offset:48
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[12:15], s[34:35]
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[0:3], s[34:35] offset:16
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[20:23], s[0:1] offset:96
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[28:31], s[0:1] offset:112
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[24:27], s[0:1] offset:64
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[16:19], s[0:1] offset:80
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[8:11], s[0:1] offset:32
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[4:7], s[0:1] offset:48
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[12:15], s[0:1]
; GFX1250-SDAG-NEXT: global_store_b128 v56, v[0:3], s[0:1] offset:16
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: fma_v32_vs:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_clause 0x7
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v56, s[34:35]
; GFX1250-GISEL-NEXT: global_load_b128 v[4:7], v56, s[34:35] offset:16
; GFX1250-GISEL-NEXT: global_load_b128 v[8:11], v56, s[34:35] offset:32
; GFX1250-GISEL-NEXT: global_load_b128 v[12:15], v56, s[34:35] offset:48
; GFX1250-GISEL-NEXT: global_load_b128 v[16:19], v56, s[34:35] offset:64
; GFX1250-GISEL-NEXT: global_load_b128 v[20:23], v56, s[34:35] offset:80
; GFX1250-GISEL-NEXT: global_load_b128 v[24:27], v56, s[34:35] offset:96
; GFX1250-GISEL-NEXT: global_load_b128 v[28:31], v56, s[34:35] offset:112
; GFX1250-GISEL-NEXT: s_load_b512 s[16:31], s[4:5], 0xa4
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_load_b512 s[0:15], s[4:5], 0xe4
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v56, s[0:1]
; GFX1250-GISEL-NEXT: global_load_b128 v[4:7], v56, s[0:1] offset:16
; GFX1250-GISEL-NEXT: global_load_b128 v[8:11], v56, s[0:1] offset:32
; GFX1250-GISEL-NEXT: global_load_b128 v[12:15], v56, s[0:1] offset:48
; GFX1250-GISEL-NEXT: global_load_b128 v[16:19], v56, s[0:1] offset:64
; GFX1250-GISEL-NEXT: global_load_b128 v[20:23], v56, s[0:1] offset:80
; GFX1250-GISEL-NEXT: global_load_b128 v[24:27], v56, s[0:1] offset:96
; GFX1250-GISEL-NEXT: global_load_b128 v[28:31], v56, s[0:1] offset:112
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[16:17]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[18:19]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[20:21]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[22:23]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[40:41], s[24:25]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[42:43], s[26:27]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[44:45], s[28:29]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[46:47], s[30:31]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[48:49], s[0:1]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[50:51], s[2:3]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[52:53], s[4:5]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[54:55], s[6:7]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[36:37]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[38:39]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[40:41]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[42:43]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[40:41], s[44:45]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[42:43], s[46:47]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[44:45], s[48:49]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[46:47], s[50:51]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[48:49], s[8:9]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[50:51], s[10:11]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[52:53], s[12:13]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[54:55], s[14:15]
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x7
; GFX1250-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], v[32:33], v[32:33]
; GFX1250-GISEL-NEXT: v_pk_fma_f32 v[2:3], v[2:3], v[34:35], v[34:35]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[8:9]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[10:11]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[32:33], s[16:17]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[34:35], s[18:19]
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x6
; GFX1250-GISEL-NEXT: v_pk_fma_f32 v[4:5], v[4:5], v[36:37], v[36:37]
; GFX1250-GISEL-NEXT: v_pk_fma_f32 v[6:7], v[6:7], v[38:39], v[38:39]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[12:13]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[14:15]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[36:37], s[20:21]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[38:39], s[22:23]
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x5
; GFX1250-GISEL-NEXT: v_pk_fma_f32 v[8:9], v[8:9], v[40:41], v[40:41]
; GFX1250-GISEL-NEXT: v_pk_fma_f32 v[10:11], v[10:11], v[42:43], v[42:43]
@ -2553,14 +2553,14 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-GISEL-NEXT: v_pk_fma_f32 v[28:29], v[28:29], v[36:37], v[36:37]
; GFX1250-GISEL-NEXT: v_pk_fma_f32 v[30:31], v[30:31], v[38:39], v[38:39]
; GFX1250-GISEL-NEXT: s_clause 0x7
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[0:3], s[34:35]
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[4:7], s[34:35] offset:16
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[8:11], s[34:35] offset:32
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[12:15], s[34:35] offset:48
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[16:19], s[34:35] offset:64
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[20:23], s[34:35] offset:80
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[24:27], s[34:35] offset:96
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[28:31], s[34:35] offset:112
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[0:3], s[0:1]
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[4:7], s[0:1] offset:16
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[8:11], s[0:1] offset:32
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[12:15], s[0:1] offset:48
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[16:19], s[0:1] offset:64
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[20:23], s[0:1] offset:80
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[24:27], s[0:1] offset:96
; GFX1250-GISEL-NEXT: global_store_b128 v56, v[28:31], s[0:1] offset:112
; GFX1250-GISEL-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
@ -3529,9 +3529,9 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
;
; GFX1250-SDAG-LABEL: fadd_fadd_fsub:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX1250-SDAG-NEXT: s_add_f32 s2, s1, s3
@ -3541,14 +3541,14 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, v2
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-SDAG-NEXT: v_pk_add_f32 v[0:1], v[4:5], v[0:1] neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-SDAG-NEXT: global_store_b64 v2, v[0:1], s[4:5]
; GFX1250-SDAG-NEXT: global_store_b64 v2, v[0:1], s[6:7]
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: fadd_fadd_fsub:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
@ -3560,7 +3560,7 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[0:1], v[2:3], v[0:1]
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_subrev_f32 v3, s3, v0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-GISEL-NEXT: global_store_b64 v0, v[2:3], s[4:5]
; GFX1250-GISEL-NEXT: global_store_b64 v0, v[2:3], s[6:7]
; GFX1250-GISEL-NEXT: s_endpgm
bb:
%i12 = fadd <2 x float> %arg, %arg1

View File

@ -186,12 +186,12 @@ define amdgpu_kernel void @mixed_inreg_block_count_x(ptr addrspace(1) %out, i32
;
; GFX1250-LABEL: mixed_inreg_block_count_x:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b32 s2, s[0:1], 0x10
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b32 s4, s[0:1], 0x10
; GFX1250-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s4
; GFX1250-NEXT: global_store_b32 v0, v1, s[2:3]
; GFX1250-NEXT: s_endpgm
%imp_arg_ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%load = load i32, ptr addrspace(4) %imp_arg_ptr

View File

@ -346,10 +346,10 @@ define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) inreg %out, ptr ad
;
; GFX1250-LABEL: byref_preload_arg:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x100
; GFX1250-NEXT: s_load_b64 s[4:5], s[0:1], 0x100
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s4
; GFX1250-NEXT: v_mov_b32_e32 v2, s5
; GFX1250-NEXT: global_store_b32 v0, v1, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v2, s[2:3] scope:SCOPE_SYS
@ -404,10 +404,10 @@ define amdgpu_kernel void @byref_staggered_preload_arg(ptr addrspace(1) inreg %o
;
; GFX1250-LABEL: byref_staggered_preload_arg:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x100
; GFX1250-NEXT: s_load_b64 s[4:5], s[0:1], 0x100
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s4
; GFX1250-NEXT: v_mov_b32_e32 v2, s5
; GFX1250-NEXT: global_store_b32 v0, v1, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v2, s[2:3] scope:SCOPE_SYS

View File

@ -97,9 +97,9 @@ entry:
define amdgpu_ps <2 x float> @s_load_b64_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b64_idxprom:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b64 s[0:1], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b64 s[4:5], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@ -111,10 +111,10 @@ entry:
define amdgpu_ps <3 x float> @s_load_b96_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b96_idxprom:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b96 s[0:2], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b96 s[4:6], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: v_mov_b32_e32 v2, s6
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@ -126,10 +126,10 @@ entry:
define amdgpu_ps <4 x float> @s_load_b128_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b128_idxprom:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b128 s[0:3], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b128 s[4:7], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@ -141,12 +141,12 @@ entry:
define amdgpu_ps <8 x float> @s_load_b256_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b256_idxprom:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b256 s[0:7], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b256 s[4:11], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@ -158,16 +158,16 @@ entry:
define amdgpu_ps <16 x float> @s_load_b512_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b512_idxprom:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b512 s[0:15], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b512 s[4:19], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
; GCN-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
; GCN-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
; GCN-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
; GCN-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
; GCN-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13
; GCN-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v11, s15
; GCN-NEXT: v_dual_mov_b32 v12, s16 :: v_dual_mov_b32 v13, s17
; GCN-NEXT: v_dual_mov_b32 v14, s18 :: v_dual_mov_b32 v15, s19
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@ -275,11 +275,11 @@ entry:
define amdgpu_ps <2 x float> @s_load_b64_idxprom_range(ptr addrspace(4) align 4 inreg %p) {
; GCN-LABEL: s_load_b64_idxprom_range:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
; GCN-NEXT: s_load_b32 s4, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: s_load_b64 s[0:1], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b64 s[2:3], s[0:1], s4 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0
@ -294,10 +294,10 @@ define amdgpu_ps <3 x float> @s_load_b96_idxprom_range(ptr addrspace(4) align 4
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: s_load_b96 s[0:2], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b96 s[4:6], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: v_mov_b32_e32 v2, s6
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0
@ -312,10 +312,10 @@ define amdgpu_ps <4 x float> @s_load_b128_idxprom_range(ptr addrspace(4) align 4
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: s_load_b128 s[0:3], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b128 s[4:7], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0
@ -330,12 +330,12 @@ define amdgpu_ps <8 x float> @s_load_b256_idxprom_range(ptr addrspace(4) align 4
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: s_load_b256 s[0:7], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b256 s[4:11], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0
@ -350,16 +350,16 @@ define amdgpu_ps <16 x float> @s_load_b512_idxprom_range(ptr addrspace(4) align
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: s_load_b512 s[0:15], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_load_b512 s[4:19], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
; GCN-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
; GCN-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
; GCN-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
; GCN-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
; GCN-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13
; GCN-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v11, s15
; GCN-NEXT: v_dual_mov_b32 v12, s16 :: v_dual_mov_b32 v13, s17
; GCN-NEXT: v_dual_mov_b32 v14, s18 :: v_dual_mov_b32 v15, s19
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0

View File

@ -17,16 +17,16 @@ define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i3
;
; GFX1250-LABEL: v_ashr_pk_i8_i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_and_b32 s2, s2, 31
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_ashr_pk_i8_i32 v0, s0, s1, v0
; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
; GFX1250-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX1250-NEXT: s_endpgm
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1
@ -58,16 +58,16 @@ define amdgpu_kernel void @v_ashr_pk_u8_i32(ptr addrspace(1) %out, i32 %src0, i3
;
; GFX1250-LABEL: v_ashr_pk_u8_i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_and_b32 s2, s2, 31
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_ashr_pk_u8_i32 v0, s0, s1, v0
; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
; GFX1250-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX1250-NEXT: s_endpgm
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1

View File

@ -211,38 +211,39 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac
; GFX1250-SDAG-LABEL: workgroup_id_xyz:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
; GFX1250-SDAG-NEXT: s_lshr_b32 s6, ttmp7, 16
; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s0, 1
; GFX1250-SDAG-NEXT: s_lshr_b32 s8, ttmp7, 16
; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s0, 1
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-SDAG-NEXT: s_mul_i32 s4, s8, s9
; GFX1250-SDAG-NEXT: s_bfe_u32 s9, ttmp6, 0x40010
; GFX1250-SDAG-NEXT: s_mul_i32 s7, s6, s7
; GFX1250-SDAG-NEXT: s_bfe_u32 s8, ttmp6, 0x40008
; GFX1250-SDAG-NEXT: s_bfe_u32 s5, ttmp6, 0x40008
; GFX1250-SDAG-NEXT: s_and_b32 s10, ttmp7, 0xffff
; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s9, 1
; GFX1250-SDAG-NEXT: s_bfe_u32 s11, ttmp6, 0x4000c
; GFX1250-SDAG-NEXT: s_add_co_i32 s8, s8, s7
; GFX1250-SDAG-NEXT: s_mul_i32 s7, s10, s9
; GFX1250-SDAG-NEXT: s_add_co_i32 s5, s5, s4
; GFX1250-SDAG-NEXT: s_mul_i32 s4, s10, s9
; GFX1250-SDAG-NEXT: s_bfe_u32 s9, ttmp6, 0x40004
; GFX1250-SDAG-NEXT: s_add_co_i32 s11, s11, 1
; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s9, s7
; GFX1250-SDAG-NEXT: s_and_b32 s7, ttmp6, 15
; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s9, s4
; GFX1250-SDAG-NEXT: s_and_b32 s4, ttmp6, 15
; GFX1250-SDAG-NEXT: s_mul_i32 s11, ttmp9, s11
; GFX1250-SDAG-NEXT: s_getreg_b32 s12, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s7, s11
; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, s11
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s12, 0
; GFX1250-SDAG-NEXT: s_cselect_b32 s7, ttmp9, s7
; GFX1250-SDAG-NEXT: s_cselect_b32 s4, ttmp9, s4
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s7
; GFX1250-SDAG-NEXT: s_cselect_b32 s7, s10, s9
; GFX1250-SDAG-NEXT: s_cselect_b32 s6, s6, s8
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s6
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s4
; GFX1250-SDAG-NEXT: s_cselect_b32 s4, s10, s9
; GFX1250-SDAG-NEXT: s_cselect_b32 s5, s8, s5
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_clause 0x2
; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-SDAG-NEXT: global_store_b32 v0, v2, s[2:3]
; GFX1250-SDAG-NEXT: global_store_b32 v0, v3, s[4:5]
; GFX1250-SDAG-NEXT: global_store_b32 v0, v3, s[6:7]
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: workgroup_id_xyz:
@ -250,39 +251,40 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac
; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-GISEL-NEXT: s_getreg_b32 s8, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0
; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s7, ttmp9, s1
; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s8, 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s9, ttmp9, s1
; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
; GFX1250-GISEL-NEXT: s_and_b32 s8, ttmp7, 0xffff
; GFX1250-GISEL-NEXT: s_and_b32 s10, ttmp7, 0xffff
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-GISEL-NEXT: s_bfe_u32 s9, ttmp6, 0x40004
; GFX1250-GISEL-NEXT: s_mul_i32 s10, s8, s0
; GFX1250-GISEL-NEXT: s_bfe_u32 s11, ttmp6, 0x40004
; GFX1250-GISEL-NEXT: s_mul_i32 s12, s10, s0
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
; GFX1250-GISEL-NEXT: s_add_co_i32 s11, s11, s12
; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s8, 0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s9
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GFX1250-GISEL-NEXT: s_add_co_i32 s9, s9, s10
; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s7
; GFX1250-GISEL-NEXT: s_cselect_b32 s8, s8, s9
; GFX1250-GISEL-NEXT: s_bfe_u32 s9, ttmp6, 0x40014
; GFX1250-GISEL-NEXT: s_cselect_b32 s4, s10, s11
; GFX1250-GISEL-NEXT: s_bfe_u32 s5, ttmp6, 0x40014
; GFX1250-GISEL-NEXT: s_lshr_b32 s10, ttmp7, 16
; GFX1250-GISEL-NEXT: s_add_co_i32 s9, s9, 1
; GFX1250-GISEL-NEXT: s_add_co_i32 s5, s5, 1
; GFX1250-GISEL-NEXT: s_bfe_u32 s11, ttmp6, 0x40008
; GFX1250-GISEL-NEXT: s_mul_i32 s9, s10, s9
; GFX1250-GISEL-NEXT: s_mul_i32 s5, s10, s5
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: s_add_co_i32 s11, s11, s9
; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s6, s10, s11
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v3, s6
; GFX1250-GISEL-NEXT: s_add_co_i32 s11, s11, s5
; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s8, 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s5, s10, s11
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_clause 0x2
; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1250-GISEL-NEXT: global_store_b32 v1, v2, s[2:3]
; GFX1250-GISEL-NEXT: global_store_b32 v1, v3, s[4:5]
; GFX1250-GISEL-NEXT: global_store_b32 v1, v3, s[6:7]
; GFX1250-GISEL-NEXT: s_endpgm
; GFX12-LABEL: workgroup_id_xyz:
; GFX12: ; %bb.0:

View File

@ -178,6 +178,7 @@ max_vgprs:
// ASM-NEXT: .amdhsa_next_free_sgpr 32
// ASM-NEXT: .amdhsa_named_barrier_count 3
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1

View File

@ -178,6 +178,7 @@ max_vgprs:
// ASM-NEXT: .amdhsa_next_free_sgpr 32
// ASM-NEXT: .amdhsa_named_barrier_count 3
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1

View File

@ -20,7 +20,7 @@
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
; CHECK-NEXT: .amdhsa_reserve_xnack_mask 1
; CHECK-NEXT: .amdhsa_next_free_sgpr 8
; CHECK-NEXT: .amdhsa_float_round_mode_32 0
; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
@ -76,7 +76,7 @@
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
; CHECK-NEXT: .amdhsa_reserve_xnack_mask 1
; CHECK-NEXT: .amdhsa_next_free_sgpr 8
; CHECK-NEXT: .amdhsa_float_round_mode_32 0
; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0