[AMDGPU] Update gfx1250 sched model to latest table (#182923)

This commit is contained in:
Stanislav Mekhanoshin 2026-02-23 13:25:07 -08:00 committed by GitHub
parent b4564abb01
commit 9829d082af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 330 additions and 331 deletions

View File

@ -491,7 +491,7 @@ def : HWWriteRes<Write16PassWMMA, [HWVALU], 64>;
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 7>;
def : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 8>;
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 6>;
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
def : HWWriteRes<WritePseudoScalarTrans, [HWVALU, HWRC], 8>;
@ -517,10 +517,10 @@ def : InstRW<[WriteXDL2PassWMMA], (instregex "^V_WMMA.*_F32_32X16X128_F4")>;
let SchedModel = GFX1250SpeedModel in {
defm : GFX125xCommonWriteRes;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 7>;
def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 11>;
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 32>;
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 32>;
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 32>;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>;
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 37>;
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 37>;
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 37>;
def : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 38>;
} // SchedModel = GFX1250SpeedModel

View File

@ -3044,12 +3044,12 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1
; GFX1250-NEXT: v_mul_lo_u32 v30, v4, v11
; GFX1250-NEXT: v_mul_lo_u32 v29, v5, v10
; GFX1250-NEXT: v_mul_lo_u32 v29, v4, v11
; GFX1250-NEXT: v_mul_lo_u32 v30, v2, v13
; GFX1250-NEXT: v_mul_lo_u32 v31, v3, v12
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v16, v14, 0
; GFX1250-NEXT: v_mul_lo_u32 v32, v2, v13
; GFX1250-NEXT: v_mul_lo_u32 v28, v5, v10
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v17, v13, v[0:1]
; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v16, v12, 0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@ -3070,65 +3070,67 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v16, v10, 0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], vcc_lo, v4, v8, v[0:1]
; GFX1250-NEXT: v_add_co_ci_u32_e64 v28, null, 0, v26, vcc_lo
; GFX1250-NEXT: v_add_co_ci_u32_e64 v26, null, 0, v26, vcc_lo
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_mad_nc_u64_u32 v[24:25], v6, v8, v[20:21]
; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v9, v[18:19]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_dual_mov_b32 v18, v23 :: v_dual_mov_b32 v19, v24
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_mov_b32_e32 v18, v23
; GFX1250-NEXT: v_cndmask_b32_e64 v27, 0, 1, s0
; GFX1250-NEXT: v_mov_b32_e32 v19, v24
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v2, v8, v[0:1]
; GFX1250-NEXT: v_mul_lo_u32 v24, v6, v9
; GFX1250-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v16, v13, v[18:19]
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s0, v2, v8, v[0:1]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v6, s0
; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v12, v[20:21]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_dual_mov_b32 v20, v19 :: v_dual_mov_b32 v21, v22
; GFX1250-NEXT: v_mov_b32_e32 v13, v18
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], s2, v16, v11, v[20:21]
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], s1, v2, v11, v[0:1]
; GFX1250-NEXT: v_cndmask_b32_e64 v11, 0, 1, s2
; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v16, v8, 0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mad_co_u64_u32 v[26:27], s2, v17, v10, v[22:23]
; GFX1250-NEXT: v_add_co_ci_u32_e64 v33, null, 0, v11, s2
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], s3, v3, v10, v[20:21]
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v27, vcc_lo
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_mov_b32_e32 v12, v1
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s2, v2, v9, v[26:27]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], s6, v16, v9, v[12:13]
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s4, v4, v9, v[22:23]
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v33, s2
; GFX1250-NEXT: v_mul_lo_u32 v2, v16, v15
; GFX1250-NEXT: v_mul_lo_u32 v9, v17, v14
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v3, v8, v[10:11]
; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], s0, v16, v13, v[18:19]
; GFX1250-NEXT: v_dual_mov_b32 v19, v22 :: v_dual_mov_b32 v18, v21
; GFX1250-NEXT: v_mul_lo_u32 v21, v16, v15
; GFX1250-NEXT: v_mul_lo_u32 v27, v17, v14
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], vcc_lo, v17, v12, v[0:1]
; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s2, v16, v11, v[18:19]
; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v16, v8, 0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s1, v2, v11, v[22:23]
; GFX1250-NEXT: v_cndmask_b32_e64 v11, 0, 1, s2
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v17, v10, v[14:15]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v15, v20
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v11, s2
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], s3, v3, v10, v[18:19]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s2, v2, v9, v[12:13]
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s2
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s5, v5, v8, v[18:19]
; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s2, v17, v8, v[20:21]
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s4, v16, v9, v[14:15]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v4, v9, v[22:23]
; GFX1250-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s4, v3, v8, v[10:11]
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s4, v5, v8, v[12:13]
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s5, v17, v8, v[18:19]
; GFX1250-NEXT: v_add_co_ci_u32_e64 v3, s5, v2, v14, s5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_add_co_ci_u32_e64 v3, s2, v3, v12, s2
; GFX1250-NEXT: v_add_co_ci_u32_e64 v4, s2, v6, v13, s2
; GFX1250-NEXT: v_add_co_ci_u32_e64 v4, s5, v6, v15, s5
; GFX1250-NEXT: v_add_co_ci_u32_e64 v5, s5, v1, v10, s5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_add_co_ci_u32_e64 v5, s2, v1, v10, s2
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, s2, v28, v11, s2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v25, v2, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, v15
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v9, s5
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, s5, v26, v11, s5
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v25, v21, s5
; GFX1250-NEXT: v_mov_b32_e32 v2, v13
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v27, s4
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v30, s2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v32, s4
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v31, s3
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v29, s1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v30, s1
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v29, s0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v24, vcc_lo
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v28, vcc_lo
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v24, s0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mad_u32 v7, v7, v8, v1
; GFX1250-NEXT: v_mov_b32_e32 v1, v14
; GFX1250-NEXT: v_mov_b32_e32 v1, v12
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%result = mul i256 %num, %den
ret i256 %result
@ -3297,7 +3299,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
; GFX1250-LABEL: s_mul_u64_zext_with_sregs:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
@ -3508,7 +3510,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
; GFX1250-LABEL: s_mul_u64_sext_with_sregs:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0

View File

@ -3514,7 +3514,7 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) {
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store@gotpcrel+4
; GFX1250-NEXT: v_writelane_b32 v4, s30, 0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
; GFX1250-NEXT: v_writelane_b32 v4, s31, 1
; GFX1250-NEXT: s_wait_kmcnt 0x0
@ -3758,7 +3758,7 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) {
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
; GFX1250-NEXT: v_writelane_b32 v4, s30, 0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
; GFX1250-NEXT: v_writelane_b32 v4, s31, 1
; GFX1250-NEXT: s_wait_kmcnt 0x0
@ -4022,7 +4022,7 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) {
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
; GFX1250-NEXT: v_writelane_b32 v5, s30, 0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
; GFX1250-NEXT: v_mov_b32_e32 v4, v2
; GFX1250-NEXT: v_writelane_b32 v5, s31, 1
@ -4300,7 +4300,7 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) {
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
; GFX1250-NEXT: v_writelane_b32 v5, s30, 0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
; GFX1250-NEXT: v_mov_b32_e32 v4, v2
; GFX1250-NEXT: v_writelane_b32 v5, s31, 1
@ -4616,7 +4616,7 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) {
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
; GFX1250-NEXT: v_writelane_b32 v5, s30, 0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
; GFX1250-NEXT: v_writelane_b32 v5, s31, 1
; GFX1250-NEXT: s_wait_kmcnt 0x0
@ -5019,7 +5019,7 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) {
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
; GFX1250-NEXT: v_writelane_b32 v9, s30, 0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
; GFX1250-NEXT: v_writelane_b32 v9, s31, 1
; GFX1250-NEXT: s_wait_kmcnt 0x0
@ -9355,18 +9355,18 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-NEXT: s_clause 0x1f
; GFX1250-NEXT: global_load_u16 v1, v[2:3], off offset:2
; GFX1250-NEXT: global_load_u16 v4, v[2:3], off offset:12
; GFX1250-NEXT: global_load_u16 v5, v[2:3], off offset:8
; GFX1250-NEXT: global_load_u16 v6, v[2:3], off offset:4
; GFX1250-NEXT: global_load_u16 v7, v[2:3], off
; GFX1250-NEXT: global_load_u16 v8, v[2:3], off offset:6
; GFX1250-NEXT: global_load_u16 v9, v[2:3], off offset:10
; GFX1250-NEXT: global_load_u16 v10, v[2:3], off offset:14
; GFX1250-NEXT: global_load_u16 v11, v[2:3], off offset:18
; GFX1250-NEXT: global_load_u16 v12, v[2:3], off offset:62
; GFX1250-NEXT: global_load_u16 v13, v[2:3], off offset:60
; GFX1250-NEXT: global_load_u16 v14, v[2:3], off offset:58
; GFX1250-NEXT: global_load_u16 v15, v[2:3], off offset:56
; GFX1250-NEXT: global_load_u16 v10, v[2:3], off offset:12
; GFX1250-NEXT: global_load_u16 v6, v[2:3], off offset:8
; GFX1250-NEXT: global_load_u16 v4, v[2:3], off offset:4
; GFX1250-NEXT: global_load_u16 v5, v[2:3], off
; GFX1250-NEXT: global_load_u16 v7, v[2:3], off offset:6
; GFX1250-NEXT: global_load_u16 v8, v[2:3], off offset:62
; GFX1250-NEXT: global_load_u16 v9, v[2:3], off offset:60
; GFX1250-NEXT: global_load_u16 v11, v[2:3], off offset:58
; GFX1250-NEXT: global_load_u16 v12, v[2:3], off offset:56
; GFX1250-NEXT: global_load_u16 v13, v[2:3], off offset:10
; GFX1250-NEXT: global_load_u16 v14, v[2:3], off offset:14
; GFX1250-NEXT: global_load_u16 v15, v[2:3], off offset:18
; GFX1250-NEXT: global_load_u16 v16, v[2:3], off offset:28
; GFX1250-NEXT: global_load_u16 v17, v[2:3], off offset:24
; GFX1250-NEXT: global_load_u16 v18, v[2:3], off offset:20
@ -9387,70 +9387,67 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
; GFX1250-NEXT: global_load_u16 v33, v[2:3], off offset:48
; GFX1250-NEXT: global_load_u16 v34, v[2:3], off offset:54
; GFX1250-NEXT: s_wait_loadcnt 0x1e
; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v37, 16, v4
; GFX1250-NEXT: s_wait_loadcnt 0x1c
; GFX1250-NEXT: v_dual_lshlrev_b32 v81, 16, v5 :: v_dual_lshlrev_b32 v85, 16, v6
; GFX1250-NEXT: s_wait_loadcnt 0x1a
; GFX1250-NEXT: v_dual_lshlrev_b32 v84, 16, v7 :: v_dual_lshlrev_b32 v35, 16, v8
; GFX1250-NEXT: s_wait_loadcnt 0x18
; GFX1250-NEXT: v_dual_lshlrev_b32 v80, 16, v9 :: v_dual_lshlrev_b32 v36, 16, v10
; GFX1250-NEXT: s_wait_loadcnt 0x15
; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v37, 16, v10
; GFX1250-NEXT: s_wait_loadcnt 0x1b
; GFX1250-NEXT: v_dual_lshlrev_b32 v85, 16, v4 :: v_dual_lshlrev_b32 v84, 16, v5
; GFX1250-NEXT: s_wait_loadcnt 0x19
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_lshlrev_b32 v2, 16, v12 :: v_dual_lshlrev_b32 v3, 16, v13
; GFX1250-NEXT: s_wait_loadcnt 0x14
; GFX1250-NEXT: v_dual_lshlrev_b32 v12, 16, v11 :: v_dual_lshlrev_b32 v6, 16, v14
; GFX1250-NEXT: s_wait_loadcnt 0x13
; GFX1250-NEXT: v_lshlrev_b32_e32 v7, 16, v15
; GFX1250-NEXT: v_dual_lshlrev_b32 v35, 16, v7 :: v_dual_lshlrev_b32 v2, 16, v8
; GFX1250-NEXT: s_wait_loadcnt 0x17
; GFX1250-NEXT: v_dual_lshlrev_b32 v3, 16, v9 :: v_dual_lshlrev_b32 v7, 16, v11
; GFX1250-NEXT: s_wait_loadcnt 0x15
; GFX1250-NEXT: v_dual_lshlrev_b32 v11, 16, v12 :: v_dual_lshlrev_b32 v12, 16, v13
; GFX1250-NEXT: v_lshlrev_b32_e32 v13, 16, v6
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v2
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
; GFX1250-NEXT: s_wait_loadcnt 0x11
; GFX1250-NEXT: v_dual_lshlrev_b32 v68, 16, v17 :: v_dual_lshlrev_b32 v39, 16, v16
; GFX1250-NEXT: s_wait_loadcnt 0xe
; GFX1250-NEXT: v_lshlrev_b32_e32 v20, 16, v20
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[8:9], v6
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[6:7], v7
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[8:9], v7
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[6:7], v11
; GFX1250-NEXT: s_wait_loadcnt 0x13
; GFX1250-NEXT: v_dual_lshlrev_b32 v36, 16, v14 :: v_dual_lshlrev_b32 v38, 16, v15
; GFX1250-NEXT: s_wait_loadcnt 0xc
; GFX1250-NEXT: v_dual_lshlrev_b32 v21, 16, v21 :: v_dual_lshlrev_b32 v38, 16, v22
; GFX1250-NEXT: v_dual_lshlrev_b32 v21, 16, v21 :: v_dual_lshlrev_b32 v39, 16, v22
; GFX1250-NEXT: s_wait_loadcnt 0xb
; GFX1250-NEXT: v_dual_lshlrev_b32 v49, 16, v23 :: v_dual_lshlrev_b32 v68, 16, v17
; GFX1250-NEXT: s_wait_loadcnt 0x9
; GFX1250-NEXT: v_dual_lshlrev_b32 v48, 16, v23 :: v_dual_lshlrev_b32 v25, 16, v25
; GFX1250-NEXT: v_dual_lshlrev_b32 v48, 16, v16 :: v_dual_lshlrev_b32 v25, 16, v25
; GFX1250-NEXT: v_lshlrev_b32_e32 v24, 16, v24
; GFX1250-NEXT: s_wait_loadcnt 0x5
; GFX1250-NEXT: v_dual_lshlrev_b32 v49, 16, v28 :: v_dual_lshlrev_b32 v64, 16, v29
; GFX1250-NEXT: v_dual_lshlrev_b32 v50, 16, v28 :: v_dual_lshlrev_b32 v64, 16, v29
; GFX1250-NEXT: s_wait_loadcnt 0x3
; GFX1250-NEXT: v_dual_lshlrev_b32 v50, 16, v30 :: v_dual_lshlrev_b32 v51, 16, v31
; GFX1250-NEXT: v_dual_lshlrev_b32 v51, 16, v30 :: v_dual_lshlrev_b32 v52, 16, v31
; GFX1250-NEXT: v_dual_lshlrev_b32 v69, 16, v27 :: v_dual_lshlrev_b32 v70, 16, v26
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_dual_lshlrev_b32 v33, 16, v33 :: v_dual_lshlrev_b32 v52, 16, v34
; GFX1250-NEXT: v_dual_lshlrev_b32 v32, 16, v32 :: v_dual_lshlrev_b32 v69, 16, v27
; GFX1250-NEXT: v_lshlrev_b32_e32 v70, 16, v26
; GFX1250-NEXT: v_dual_lshlrev_b32 v53, 16, v34 :: v_dual_lshlrev_b32 v32, 16, v32
; GFX1250-NEXT: v_lshlrev_b32_e32 v33, 16, v33
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[22:23], v38
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[30:31], v39
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[38:39], v50
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[54:55], v53
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[14:15], v35
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[54:55], v52
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[52:53], v32
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[30:31], v38
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[28:29], v39
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[34:35], v48
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[38:39], v49
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[28:29], v48
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[34:35], v49
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[48:49], v33
; GFX1250-NEXT: v_dual_lshlrev_b32 v13, 16, v19 :: v_dual_lshlrev_b32 v82, 16, v18
; GFX1250-NEXT: v_dual_lshlrev_b32 v20, 16, v20 :: v_dual_lshlrev_b32 v81, 16, v18
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[66:67], v64
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[64:65], v25
; GFX1250-NEXT: scratch_store_b128 v0, v[2:5], off offset:240
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v50
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[50:51], v51
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[2:3], v24
; GFX1250-NEXT: v_lshlrev_b32_e32 v80, 16, v19
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[18:19], v36
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[16:17], v37
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[36:37], v70
; GFX1250-NEXT: scratch_store_b128 v0, v[2:5], off offset:240
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v51
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[50:51], v52
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[52:53], v32
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[2:3], v24
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[32:33], v69
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[70:71], v21
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[68:69], v68
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[26:27], v20
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[24:25], v82
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[22:23], v12
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[20:21], v13
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[82:83], v80
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[80:81], v81
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[24:25], v81
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[20:21], v80
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[82:83], v12
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[80:81], v13
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[12:13], v85
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[10:11], v1
; GFX1250-NEXT: scratch_store_b128 v0, v[6:9], off offset:224
@ -38876,15 +38873,15 @@ define <4 x bfloat> @v_sitofp_v4i64_to_v4bf16(<4 x i64> %x) {
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v11, v[0:1]
; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_or_b32_e32 v6, v7, v6
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_min_u32_e32 v2, 1, v2
; GFX1250-NEXT: v_sub_nc_u32_e32 v7, 32, v10
; GFX1250-NEXT: v_or_b32_e32 v6, v7, v6
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
; GFX1250-NEXT: v_or_b32_e32 v4, v5, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_dual_sub_nc_u32 v5, 32, v9 :: v_dual_bitop2_b32 v2, v3, v2 bitop3:0x54
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX1250-NEXT: v_or_b32_e32 v0, v1, v0
; GFX1250-NEXT: v_dual_sub_nc_u32 v7, 32, v10 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
; GFX1250-NEXT: v_sub_nc_u32_e32 v1, 32, v8
; GFX1250-NEXT: v_cvt_f32_i32_e32 v3, v6
; GFX1250-NEXT: v_cvt_f32_i32_e32 v4, v4
@ -41716,29 +41713,30 @@ define <4 x bfloat> @v_uitofp_v4i64_to_v4bf16(<4 x i64> %x) {
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v10, v[0:1]
; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v11, v[4:5]
; GFX1250-NEXT: v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_sub_nc_u32 v11, 32, v11
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_min_u32_e32 v6, 1, v6
; GFX1250-NEXT: v_min_u32_e32 v2, 1, v2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_dual_sub_nc_u32 v9, 32, v9 :: v_dual_bitop2_b32 v6, v7, v6 bitop3:0x54
; GFX1250-NEXT: v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_bitop2_b32 v6, v7, v6 bitop3:0x54
; GFX1250-NEXT: v_or_b32_e32 v2, v3, v2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_dual_sub_nc_u32 v3, 32, v10 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
; GFX1250-NEXT: v_dual_sub_nc_u32 v3, 32, v11 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
; GFX1250-NEXT: v_or_b32_e32 v1, v5, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_cvt_f32_u32_e32 v4, v6
; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_sub_nc_u32_e32 v5, 32, v9
; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
; GFX1250-NEXT: v_sub_nc_u32_e32 v6, 32, v10
; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_ldexp_f32 v4, v4, v8
; GFX1250-NEXT: v_ldexp_f32 v2, v2, v9
; GFX1250-NEXT: v_ldexp_f32 v2, v2, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_ldexp_f32 v0, v0, v3
; GFX1250-NEXT: v_ldexp_f32 v1, v1, v11
; GFX1250-NEXT: v_ldexp_f32 v0, v0, v6
; GFX1250-NEXT: v_ldexp_f32 v1, v1, v3
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, v4

View File

@ -40,7 +40,7 @@ define spir_kernel void @kernel(ptr addrspace(1) %out) {
; GFX1250-LABEL: kernel:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v0, s[0:1]
@ -290,7 +290,7 @@ define amdgpu_kernel void @call_coldcc() #0 {
; GFX1250-NEXT: s_get_pc_i64 s[6:7]
; GFX1250-NEXT: s_add_nc_u64 s[6:7], s[6:7], coldcc@gotpcrel+4
; GFX1250-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 1.0
; GFX1250-NEXT: s_load_b64 s[12:13], s[6:7], 0x0
; GFX1250-NEXT: s_load_b64 s[12:13], s[6:7], 0x0 nv
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[4:5], 36
; GFX1250-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1250-NEXT: s_wait_xcnt 0x0
@ -402,7 +402,7 @@ define amdgpu_kernel void @call_fastcc() #0 {
; GFX1250-NEXT: s_get_pc_i64 s[6:7]
; GFX1250-NEXT: s_add_nc_u64 s[6:7], s[6:7], fastcc@gotpcrel+4
; GFX1250-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 1.0
; GFX1250-NEXT: s_load_b64 s[12:13], s[6:7], 0x0
; GFX1250-NEXT: s_load_b64 s[12:13], s[6:7], 0x0 nv
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[4:5], 36
; GFX1250-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1250-NEXT: s_wait_xcnt 0x0
@ -1437,7 +1437,7 @@ define amdgpu_kernel void @amd_kernel_i8(i8 %arg0) {
; GFX1250-LABEL: amd_kernel_i8:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 nv
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_add_co_i32 s0, s0, s0
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@ -1503,7 +1503,7 @@ define amdgpu_kernel void @amd_kernel_v2i8(<2 x i8> %arg0) {
; GFX1250-LABEL: amd_kernel_v2i8:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_bfe_u32 s1, s0, 0x80008
@ -1605,7 +1605,7 @@ define amdgpu_kernel void @amd_kernel_v4i8(<4 x i8> %arg0) {
; GFX1250-LABEL: amd_kernel_v4i8:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_lshr_b32 s1, s0, 16
@ -1704,7 +1704,7 @@ define amdgpu_kernel void @amd_kernel_v3i8(<3 x i8> %arg0) {
; GFX1250-LABEL: amd_kernel_v3i8:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 2
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
@ -1825,7 +1825,7 @@ define amdgpu_kernel void @amd_kernel_v5i8(<5 x i8> %arg0) {
; GFX1250-LABEL: amd_kernel_v5i8:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
@ -1991,8 +1991,8 @@ define amdgpu_kernel void @amd_kernel_v8i8(<8 x i8> %arg0) {
; GFX1250-LABEL: amd_kernel_v8i8:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_lshr_b32 s2, s0, 16
; GFX1250-NEXT: s_lshr_b32 s3, s0, 24
@ -2021,14 +2021,14 @@ define amdgpu_kernel void @amd_kernel_v8i8(<8 x i8> %arg0) {
; GFX1250-NEXT: s_or_b32 s0, s0, s6
; GFX1250-NEXT: s_or_b32 s2, s2, s3
; GFX1250-NEXT: s_and_b32 s1, s1, 0xffff
; GFX1250-NEXT: s_lshl_b32 s3, s4, 16
; GFX1250-NEXT: s_and_b32 s0, s0, 0xffff
; GFX1250-NEXT: s_lshl_b32 s2, s2, 16
; GFX1250-NEXT: s_or_b32 s1, s1, s3
; GFX1250-NEXT: s_lshl_b32 s3, s4, 16
; GFX1250-NEXT: s_or_b32 s0, s0, s2
; GFX1250-NEXT: s_or_b32 s1, s1, s3
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1250-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1250-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX1250-NEXT: s_endpgm
entry:
%add = add <8 x i8> %arg0, %arg0
@ -2269,7 +2269,7 @@ define amdgpu_kernel void @amd_kernel_v16i8(<16 x i8> %arg0) {
; GFX1250-LABEL: amd_kernel_v16i8:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b64_e32 v[4:5], 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_lshr_b32 s6, s1, 16
@ -2791,7 +2791,7 @@ define amdgpu_kernel void @amd_kernel_v32i8(<32 x i8> %arg0) {
; GFX1250-LABEL: amd_kernel_v32i8:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24 nv
; GFX1250-NEXT: v_mov_b64_e32 v[8:9], 16
; GFX1250-NEXT: v_mov_b64_e32 v[10:11], 0
; GFX1250-NEXT: s_wait_kmcnt 0x0

View File

@ -63,20 +63,21 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
; GCN-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
; GCN-SDAG-NEXT: global_load_b128 v[8:11], v[2:3], off
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], 12
; GCN-SDAG-NEXT: v_mov_b64_e32 v[12:13], 0
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
; GCN-SDAG-NEXT: s_wait_xcnt 0x1
; GCN-SDAG-NEXT: v_pk_add_u16 v1, v6, v10
; GCN-SDAG-NEXT: v_pk_add_u16 v12, v7, v11
; GCN-SDAG-NEXT: v_mov_b64_e32 v[6:7], 8
; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 0
; GCN-SDAG-NEXT: v_pk_add_u16 v5, v5, v9
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: v_pk_add_u16 v3, v5, v9
; GCN-SDAG-NEXT: v_pk_add_u16 v5, v7, v11
; GCN-SDAG-NEXT: v_mov_b64_e32 v[6:7], 12
; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 8
; GCN-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GCN-SDAG-NEXT: v_pk_add_u16 v4, v4, v8
; GCN-SDAG-NEXT: v_pk_add_u16 v2, v4, v8
; GCN-SDAG-NEXT: s_clause 0x2
; GCN-SDAG-NEXT: global_store_b16 v[2:3], v12, off
; GCN-SDAG-NEXT: global_store_b32 v[6:7], v1, off
; GCN-SDAG-NEXT: global_store_b64 v[10:11], v[4:5], off
; GCN-SDAG-NEXT: global_store_b16 v[6:7], v5, off
; GCN-SDAG-NEXT: global_store_b32 v[10:11], v1, off
; GCN-SDAG-NEXT: global_store_b64 v[12:13], v[2:3], off
; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GCN-GISEL-LABEL: test_v7i16_load_store:
@ -254,15 +255,15 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off
; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:64
; GCN-SDAG-NEXT: v_mov_b64_e32 v[48:49], 48
; GCN-SDAG-NEXT: v_mov_b64_e32 v[50:51], 32
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x70
; GCN-SDAG-NEXT: v_mov_b64_e32 v[64:65], 16
; GCN-SDAG-NEXT: v_mov_b64_e32 v[50:51], 32
; GCN-SDAG-NEXT: v_mov_b64_e32 v[38:39], 0x60
; GCN-SDAG-NEXT: v_mov_b64_e32 v[64:65], 16
; GCN-SDAG-NEXT: v_mov_b64_e32 v[66:67], 0
; GCN-SDAG-NEXT: v_mov_b64_e32 v[52:53], 0x50
; GCN-SDAG-NEXT: v_mov_b64_e32 v[54:55], 64
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0xc8
; GCN-SDAG-NEXT: v_mov_b64_e32 v[54:55], 64
; GCN-SDAG-NEXT: s_wait_loadcnt 0x7
; GCN-SDAG-NEXT: global_store_b128 v[2:3], v[6:9], off
; GCN-SDAG-NEXT: s_wait_loadcnt 0x6
@ -290,13 +291,13 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[48:49], v[34:35], v[34:35]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[16:17], v[16:17], v[16:17]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[14:15], 0xc8, v[14:15]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[24:25], 0x64, v[24:25]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[22:23], v[22:23], v[22:23]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[28:29], v[28:29], v[28:29]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[26:27], v[26:27], v[26:27]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[30:31], v[30:31], v[30:31]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[20:21], v[20:21], v[20:21]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[24:25], 0x64, v[24:25]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[22:23], v[22:23], v[22:23]
; GCN-SDAG-NEXT: s_clause 0x1
; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[0:3], off
; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[34:37], off
@ -403,10 +404,10 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1,
; GCN-SDAG-LABEL: test_v7i16_load_store_kernel:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GCN-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GCN-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 nv
; GCN-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 nv
; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 8
; GCN-SDAG-NEXT: v_mov_b64_e32 v[12:13], 0
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
@ -431,10 +432,10 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1,
; GCN-GISEL-LABEL: test_v7i16_load_store_kernel:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GCN-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GCN-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 nv
; GCN-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GCN-GISEL-NEXT: s_wait_xcnt 0x0
; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 nv
; GCN-GISEL-NEXT: v_mov_b64_e32 v[10:11], 2
; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 4
; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 6

View File

@ -4435,20 +4435,20 @@ define <4 x i32> @clpeak_imad_pat_v4i32(<4 x i32> %x, <4 x i32> %y) {
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v4, 1, v8 :: v_dual_add_nc_u32 v5, 1, v9
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v6, 1, v10 :: v_dual_add_nc_u32 v7, 1, v11
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v0, v4
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v1, v5
; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v6, v2, v6
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v7, v3, v7
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, 1, v2 :: v_dual_add_nc_u32 v3, 1, v3
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v4, v0
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v5, v1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v6, v2
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v7, v3
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v1, v5
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v2, v6
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v6, v3, v7
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v1, 1, v1 :: v_dual_add_nc_u32 v2, 1, v2
; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v6, v3
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%y18 = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>

View File

@ -11,8 +11,8 @@ define amdgpu_ps void @cluster_load_async_to_lds_b8_vaddr(ptr addrspace(1) %gadd
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b8_vaddr:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b8 v2, v[0:1], off offset:16 th:TH_LOAD_NT
; GFX1250-SDAG-NEXT: s_endpgm
@ -75,8 +75,8 @@ define amdgpu_ps void @cluster_load_async_to_lds_b32_vaddr(ptr addrspace(1) %gad
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b32_vaddr:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b32 v2, v[0:1], off offset:16 th:TH_LOAD_HT scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
@ -139,8 +139,8 @@ define amdgpu_ps void @cluster_load_async_to_lds_b64_vaddr(ptr addrspace(1) %gad
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b64_vaddr:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b64 v2, v[0:1], off offset:16 th:TH_LOAD_NT_HT scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_endpgm
@ -203,8 +203,8 @@ define amdgpu_ps void @cluster_load_async_to_lds_b128_vaddr(ptr addrspace(1) %ga
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b128_vaddr:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b128 v2, v[0:1], off offset:16 th:TH_LOAD_BYPASS scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_endpgm

View File

@ -428,30 +428,30 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v9, 0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v1, v9
; GFX1250-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_ashrrev_i32 v12, 31, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_dual_ashrrev_i32 v7, 31, v6 :: v_dual_mov_b32 v1, v9
; GFX1250-NEXT: v_mov_b32_e32 v21, v9
; GFX1250-NEXT: v_mul_u64_e32 v[10:11], v[0:1], v[8:9]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_dual_ashrrev_i32 v12, 31, v0 :: v_dual_mov_b32 v8, v11
; GFX1250-NEXT: v_dual_ashrrev_i32 v7, 31, v6 :: v_dual_mov_b32 v13, v12
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_mad_nc_u64_u32 v[14:15], v12, v6, v[8:9]
; GFX1250-NEXT: v_mul_u64_e32 v[16:17], v[6:7], v[12:13]
; GFX1250-NEXT: v_dual_mov_b32 v13, v12 :: v_dual_mov_b32 v8, v11
; GFX1250-NEXT: v_mul_u64_e32 v[14:15], v[6:7], v[12:13]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b32_e32 v8, v14
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v0, v7, v[8:9]
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v12, v6, v[8:9]
; GFX1250-NEXT: v_mov_b32_e32 v8, v16
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v0, v7, v[8:9]
; GFX1250-NEXT: v_dual_mov_b32 v8, v17 :: v_dual_mov_b32 v20, v19
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_dual_mov_b32 v8, v15 :: v_dual_mov_b32 v20, v19
; GFX1250-NEXT: v_add_nc_u64_e32 v[8:9], v[8:9], v[20:21]
; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v7, v0, v[16:17]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mad_nc_u64_u32 v[8:9], v12, v7, v[8:9]
; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v7, v0, v[14:15]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_add_nc_u64_e32 v[6:7], v[8:9], v[0:1]
; GFX1250-NEXT: v_add_co_u32 v0, vcc_lo, v10, v2
; GFX1250-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v18, v3, vcc_lo
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v6, v4, vcc_lo
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1250-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v7, v5, vcc_lo
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%sext0 = sext i32 %arg0 to i128
@ -1120,8 +1120,8 @@ define amdgpu_kernel void @mad_i64_i32_uniform(ptr addrspace(1) %out, i32 %arg0,
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 nv
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s5, 0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0

View File

@ -35,7 +35,7 @@ define amdgpu_kernel void @fadd_v2_vv(ptr addrspace(1) %a) {
; GFX1250-LABEL: fadd_v2_vv:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -79,7 +79,7 @@ define amdgpu_kernel void @fadd_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
; GFX1250-LABEL: fadd_v2_vs:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -145,8 +145,8 @@ define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
@ -163,8 +163,8 @@ define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
@ -349,9 +349,9 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_clause 0x2
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
@ -415,11 +415,11 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-GISEL-LABEL: fadd_v32_vs:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
@ -534,7 +534,7 @@ define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fadd_v2_v_imm:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_mov_b32 s2, 0x42c80000
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
@ -547,7 +547,7 @@ define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fadd_v2_v_imm:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 0x42c80000
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@ -608,7 +608,7 @@ define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fadd_v2_v_v_splat:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[2:3], v0, s[0:1] scale_offset
@ -620,7 +620,7 @@ define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fadd_v2_v_v_splat:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, v0
@ -683,7 +683,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fadd_v2_v_lit_splat:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -695,7 +695,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fadd_v2_v_lit_splat:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 1.0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@ -744,7 +744,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fadd_v2_v_lit_hi0:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x3f800000
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
@ -757,7 +757,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fadd_v2_v_lit_hi0:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x3f800000
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@ -806,7 +806,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fadd_v2_v_lit_lo0:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x3f80000000000000
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
@ -819,7 +819,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fadd_v2_v_lit_lo0:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x3f80000000000000
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@ -868,7 +868,7 @@ define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fadd_v2_v_unfoldable_lit:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x400000003f800000
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
@ -881,7 +881,7 @@ define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fadd_v2_v_unfoldable_lit:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x400000003f800000
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@ -946,7 +946,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -958,7 +958,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) {
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -1025,7 +1025,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg_lo:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -1037,7 +1037,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) {
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg_lo:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -1104,7 +1104,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg_hi:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -1116,7 +1116,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) {
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg_hi:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -1180,7 +1180,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, flo
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg_lo2:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -1194,7 +1194,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, flo
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg_lo2:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -1258,7 +1258,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, flo
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg_hi2:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -1272,7 +1272,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, flo
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg_hi2:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -1322,7 +1322,7 @@ define amdgpu_kernel void @fmul_v2_vv(ptr addrspace(1) %a) {
; GFX1250-LABEL: fmul_v2_vv:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -1366,7 +1366,7 @@ define amdgpu_kernel void @fmul_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
; GFX1250-LABEL: fmul_v2_vs:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -1432,8 +1432,8 @@ define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
@ -1450,8 +1450,8 @@ define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
@ -1636,9 +1636,9 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_clause 0x2
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
@ -1702,11 +1702,11 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-GISEL-LABEL: fmul_v32_vs:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
@ -1820,7 +1820,7 @@ define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fmul_v2_v_imm:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_mov_b32 s2, 0x42c80000
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
@ -1833,7 +1833,7 @@ define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fmul_v2_v_imm:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 0x42c80000
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@ -1894,7 +1894,7 @@ define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fmul_v2_v_v_splat:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[2:3], v0, s[0:1] scale_offset
@ -1906,7 +1906,7 @@ define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fmul_v2_v_v_splat:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, v0
@ -1969,7 +1969,7 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fmul_v2_v_lit_splat:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -1981,7 +1981,7 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fmul_v2_v_lit_splat:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 4.0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@ -2031,7 +2031,7 @@ define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fmul_v2_v_unfoldable_lit:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x4040000040800000
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
@ -2044,7 +2044,7 @@ define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fmul_v2_v_unfoldable_lit:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x4040000040800000
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@ -2108,7 +2108,7 @@ define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG-LABEL: fmul_v2_v_fneg:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -2120,7 +2120,7 @@ define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) {
; GFX1250-GISEL-LABEL: fmul_v2_v_fneg:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -2170,7 +2170,7 @@ define amdgpu_kernel void @fma_v2_vv(ptr addrspace(1) %a) {
; GFX1250-LABEL: fma_v2_vv:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -2214,7 +2214,7 @@ define amdgpu_kernel void @fma_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
; GFX1250-LABEL: fma_v2_vs:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -2280,8 +2280,8 @@ define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
@ -2298,8 +2298,8 @@ define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
@ -2483,11 +2483,11 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-SDAG-LABEL: fma_v32_vs:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
@ -2522,10 +2522,11 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[8:9]
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[4:5], v[4:5], v[38:39], v[38:39]
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[36:37]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[28:29], v[28:29], v[52:53], v[52:53]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x2
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[20:21], v[20:21], v[32:33], v[32:33]
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[22:23], v[22:23], v[34:35], v[34:35]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[28:29], v[28:29], v[52:53], v[52:53]
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[30:31], v[30:31], v[54:55], v[54:55]
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[26:27], v[26:27], v[50:51], v[50:51]
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[24:25], v[24:25], v[40:41], v[40:41]
@ -2549,11 +2550,11 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX1250-GISEL-LABEL: fma_v32_vs:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v56, 7, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
@ -2690,7 +2691,7 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fma_v2_v_imm:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_mov_b32 s2, 0x43480000
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
@ -2705,7 +2706,7 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fma_v2_v_imm:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v6, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 0x42c80000
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
@ -2769,7 +2770,7 @@ define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fma_v2_v_v_splat:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[2:3], v0, s[0:1] scale_offset
@ -2781,7 +2782,7 @@ define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fma_v2_v_v_splat:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, v0
@ -2864,7 +2865,7 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fma_v2_v_lit_splat:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -2876,7 +2877,7 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fma_v2_v_lit_splat:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v6, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 4.0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
@ -2966,7 +2967,7 @@ define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fma_v2_v_unfoldable_lit:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v6, 0x3ff, v0
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x4040000040800000
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x400000003f800000
@ -2980,7 +2981,7 @@ define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fma_v2_v_unfoldable_lit:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v6, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x4040000040800000
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
@ -3046,7 +3047,7 @@ define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG-LABEL: fma_v2_v_fneg:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -3058,7 +3059,7 @@ define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) {
; GFX1250-GISEL-LABEL: fma_v2_v_fneg:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
@ -3129,7 +3130,7 @@ define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %ou
; GFX1250-SDAG-LABEL: add_vector_neg_bitcast_scalar_lo:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s2
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s3
@ -3143,7 +3144,7 @@ define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %ou
; GFX1250-GISEL-LABEL: add_vector_neg_bitcast_scalar_lo:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s3
; GFX1250-GISEL-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
@ -3224,7 +3225,7 @@ define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspa
; GFX1250-SDAG-LABEL: fma_vector_vector_neg_scalar_lo_scalar_hi:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v2, s2
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v5, s3
@ -3240,7 +3241,7 @@ define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspa
; GFX1250-GISEL-LABEL: fma_vector_vector_neg_scalar_lo_scalar_hi:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s3
; GFX1250-GISEL-NEXT: ds_load_2addr_b32 v[0:1], v2 offset1:1
@ -3320,7 +3321,7 @@ define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(
; GFX1250-SDAG-LABEL: shuffle_add_f32:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v2, s2
; GFX1250-SDAG-NEXT: ds_load_b64 v[0:1], v2
@ -3333,7 +3334,7 @@ define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(
; GFX1250-GISEL-LABEL: shuffle_add_f32:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX1250-GISEL-NEXT: ds_load_b64 v[0:1], v2
@ -3410,7 +3411,7 @@ define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrsp
; GFX1250-SDAG-LABEL: shuffle_neg_add_f32:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v2, s2
; GFX1250-SDAG-NEXT: ds_load_b64 v[0:1], v2
@ -3426,7 +3427,7 @@ define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrsp
; GFX1250-GISEL-LABEL: shuffle_neg_add_f32:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX1250-GISEL-NEXT: ds_load_b64 v[0:1], v2
@ -3502,7 +3503,7 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
; GFX1250-SDAG-LABEL: fadd_fadd_fsub_0:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_add_f32 s1, s1, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
@ -3514,7 +3515,7 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
; GFX1250-GISEL-LABEL: fadd_fadd_fsub_0:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
@ -3605,8 +3606,8 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_clause 0x1
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 nv
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX1250-SDAG-NEXT: s_add_f32 s2, s1, s3
@ -3623,8 +3624,8 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_clause 0x1
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 nv
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
@ -3694,7 +3695,7 @@ define amdgpu_kernel void @fadd_shuffle_v4(ptr addrspace(1) %arg) {
; GFX1250-SDAG-LABEL: fadd_shuffle_v4:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset
@ -3707,7 +3708,7 @@ define amdgpu_kernel void @fadd_shuffle_v4(ptr addrspace(1) %arg) {
; GFX1250-GISEL-LABEL: fadd_shuffle_v4:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v6, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v6, s[0:1] scale_offset
@ -3768,7 +3769,7 @@ define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
; GFX1250-SDAG-LABEL: fneg_v2f32_vec:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -3780,7 +3781,7 @@ define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
; GFX1250-GISEL-LABEL: fneg_v2f32_vec:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_and_b32_e32 v2, 0x3ff, v0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
@ -3833,7 +3834,7 @@ define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x
; GFX1250-SDAG-LABEL: fneg_v2f32_scalar:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_xor_b32 s2, s2, 0x80000000
; GFX1250-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000
@ -3845,7 +3846,7 @@ define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x
; GFX1250-GISEL-LABEL: fneg_v2f32_scalar:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3]

View File

@ -4106,50 +4106,47 @@ define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) %
; GFX1250-LABEL: compute_mad:
; GFX1250: ; %bb.0: ; %bb
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x10
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x10 nv
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_load_b128 s[4:7], s[4:5], 0x0 nv
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_add_co_i32 s0, s10, 1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: v_mul_lo_u32 v1, s0, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_dual_add_nc_u32 v2, s0, v1 :: v_dual_add_nc_u32 v1, 1, v1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX1250-NEXT: s_add_co_i32 s2, s2, 1
; GFX1250-NEXT: s_load_b32 s6, s[6:7], 0x4 nv
; GFX1250-NEXT: v_mul_lo_u32 v1, s2, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_dual_add_nc_u32 v2, s2, v1 :: v_dual_add_nc_u32 v1, 1, v1
; GFX1250-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_add_co_i32 s7, s2, 1
; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 nv
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_and_b32 s4, ttmp6, 15
; GFX1250-NEXT: s_getreg_b32 s5, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mul_lo_u32 v3, v2, v1
; GFX1250-NEXT: s_mul_i32 s5, ttmp9, s7
; GFX1250-NEXT: s_getreg_b32 s7, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-NEXT: s_add_co_i32 s4, s4, s5
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x4
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_bfe_u32 s3, ttmp6, 0x4000c
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1250-NEXT: s_add_co_i32 s3, s3, 1
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: s_mul_i32 s3, ttmp9, s3
; GFX1250-NEXT: s_and_b32 s5, s6, 0xffff
; GFX1250-NEXT: s_cmp_eq_u32 s7, 0
; GFX1250-NEXT: v_mul_lo_u32 v3, v2, v1
; GFX1250-NEXT: s_cselect_b32 s4, ttmp9, s4
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_mad_u32 v0, s4, s5, v0
; GFX1250-NEXT: v_add_nc_u32_e32 v1, v3, v1
; GFX1250-NEXT: s_add_co_i32 s4, s4, s3
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v2
; GFX1250-NEXT: v_add_nc_u32_e32 v2, 1, v3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff
; GFX1250-NEXT: s_cmp_eq_u32 s5, 0
; GFX1250-NEXT: v_mul_lo_u32 v3, v1, v2
; GFX1250-NEXT: s_cselect_b32 s3, ttmp9, s4
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_mad_u32 v0, s3, s2, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_add_nc_u32_e32 v2, v3, v2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v1
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[2:3], v[0:1]
; GFX1250-NEXT: v_mad_u32 v3, v2, v3, v2
; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[8:9]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX1250-NEXT: v_mad_u32 v2, v3, v2, v3
; GFX1250-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-NEXT: s_endpgm