[AMDGPU] Update gfx1250 sched model to latest table (#182923)
This commit is contained in:
parent
b4564abb01
commit
9829d082af
@ -491,7 +491,7 @@ def : HWWriteRes<Write16PassWMMA, [HWVALU], 64>;
|
||||
|
||||
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
|
||||
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
|
||||
def : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 7>;
|
||||
def : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 8>;
|
||||
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 6>;
|
||||
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
|
||||
def : HWWriteRes<WritePseudoScalarTrans, [HWVALU, HWRC], 8>;
|
||||
@ -517,10 +517,10 @@ def : InstRW<[WriteXDL2PassWMMA], (instregex "^V_WMMA.*_F32_32X16X128_F4")>;
|
||||
let SchedModel = GFX1250SpeedModel in {
|
||||
defm : GFX125xCommonWriteRes;
|
||||
|
||||
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 7>;
|
||||
def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 11>;
|
||||
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 32>;
|
||||
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 32>;
|
||||
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 32>;
|
||||
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
|
||||
def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>;
|
||||
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 37>;
|
||||
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 37>;
|
||||
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 37>;
|
||||
def : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 38>;
|
||||
} // SchedModel = GFX1250SpeedModel
|
||||
|
||||
@ -3044,12 +3044,12 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v30, v4, v11
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v29, v5, v10
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v29, v4, v11
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v30, v2, v13
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v31, v3, v12
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v16, v14, 0
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v32, v2, v13
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v28, v5, v10
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v17, v13, v[0:1]
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v16, v12, 0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
@ -3070,65 +3070,67 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v16, v10, 0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], vcc_lo, v4, v8, v[0:1]
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v28, null, 0, v26, vcc_lo
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v26, null, 0, v26, vcc_lo
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[24:25], v6, v8, v[20:21]
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v9, v[18:19]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v18, v23 :: v_dual_mov_b32 v19, v24
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v18, v23
|
||||
; GFX1250-NEXT: v_cndmask_b32_e64 v27, 0, 1, s0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v19, v24
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v2, v8, v[0:1]
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v24, v6, v9
|
||||
; GFX1250-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v16, v13, v[18:19]
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s0, v2, v8, v[0:1]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v6, s0
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v12, v[20:21]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v20, v19 :: v_dual_mov_b32 v21, v22
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v13, v18
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], s2, v16, v11, v[20:21]
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], s1, v2, v11, v[0:1]
|
||||
; GFX1250-NEXT: v_cndmask_b32_e64 v11, 0, 1, s2
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v16, v8, 0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[26:27], s2, v17, v10, v[22:23]
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v33, null, 0, v11, s2
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], s3, v3, v10, v[20:21]
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v27, vcc_lo
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v12, v1
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s2, v2, v9, v[26:27]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], s6, v16, v9, v[12:13]
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s4, v4, v9, v[22:23]
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v33, s2
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v2, v16, v15
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v9, v17, v14
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v3, v8, v[10:11]
|
||||
; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], s0, v16, v13, v[18:19]
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v19, v22 :: v_dual_mov_b32 v18, v21
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v21, v16, v15
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v27, v17, v14
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], vcc_lo, v17, v12, v[0:1]
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s2, v16, v11, v[18:19]
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v16, v8, 0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s1, v2, v11, v[22:23]
|
||||
; GFX1250-NEXT: v_cndmask_b32_e64 v11, 0, 1, s2
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v17, v10, v[14:15]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v15, v20
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v11, s2
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], s3, v3, v10, v[18:19]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s2, v2, v9, v[12:13]
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s2
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s5, v5, v8, v[18:19]
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s2, v17, v8, v[20:21]
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s4, v16, v9, v[14:15]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v4, v9, v[22:23]
|
||||
; GFX1250-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s4, v3, v8, v[10:11]
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s4
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s4, v5, v8, v[12:13]
|
||||
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s5, v17, v8, v[18:19]
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v3, s5, v2, v14, s5
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v3, s2, v3, v12, s2
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v4, s2, v6, v13, s2
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v4, s5, v6, v15, s5
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v5, s5, v1, v10, s5
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v5, s2, v1, v10, s2
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, s2, v28, v11, s2
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v25, v2, s2
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v2, v15
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v9, s5
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, s5, v26, v11, s5
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v25, v21, s5
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v2, v13
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v27, s4
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v30, s2
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v32, s4
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v31, s3
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v29, s1
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v30, s1
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v29, s0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v24, vcc_lo
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v28, vcc_lo
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v24, s0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mad_u32 v7, v7, v8, v1
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v14
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v12
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%result = mul i256 %num, %den
|
||||
ret i256 %result
|
||||
@ -3297,7 +3299,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
|
||||
; GFX1250-LABEL: s_mul_u64_zext_with_sregs:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
|
||||
@ -3508,7 +3510,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
|
||||
; GFX1250-LABEL: s_mul_u64_sext_with_sregs:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
|
||||
|
||||
@ -3514,7 +3514,7 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) {
|
||||
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store@gotpcrel+4
|
||||
; GFX1250-NEXT: v_writelane_b32 v4, s30, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
|
||||
; GFX1250-NEXT: v_writelane_b32 v4, s31, 1
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
@ -3758,7 +3758,7 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) {
|
||||
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
|
||||
; GFX1250-NEXT: v_writelane_b32 v4, s30, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
|
||||
; GFX1250-NEXT: v_writelane_b32 v4, s31, 1
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
@ -4022,7 +4022,7 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) {
|
||||
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
|
||||
; GFX1250-NEXT: v_writelane_b32 v5, s30, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v4, v2
|
||||
; GFX1250-NEXT: v_writelane_b32 v5, s31, 1
|
||||
@ -4300,7 +4300,7 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) {
|
||||
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
|
||||
; GFX1250-NEXT: v_writelane_b32 v5, s30, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v4, v2
|
||||
; GFX1250-NEXT: v_writelane_b32 v5, s31, 1
|
||||
@ -4616,7 +4616,7 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) {
|
||||
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
|
||||
; GFX1250-NEXT: v_writelane_b32 v5, s30, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
|
||||
; GFX1250-NEXT: v_writelane_b32 v5, s31, 1
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
@ -5019,7 +5019,7 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) {
|
||||
; GFX1250-NEXT: s_get_pc_i64 s[0:1]
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4
|
||||
; GFX1250-NEXT: v_writelane_b32 v9, s30, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_add_co_i32 s32, s32, 16
|
||||
; GFX1250-NEXT: v_writelane_b32 v9, s31, 1
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
@ -9355,18 +9355,18 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-NEXT: s_clause 0x1f
|
||||
; GFX1250-NEXT: global_load_u16 v1, v[2:3], off offset:2
|
||||
; GFX1250-NEXT: global_load_u16 v4, v[2:3], off offset:12
|
||||
; GFX1250-NEXT: global_load_u16 v5, v[2:3], off offset:8
|
||||
; GFX1250-NEXT: global_load_u16 v6, v[2:3], off offset:4
|
||||
; GFX1250-NEXT: global_load_u16 v7, v[2:3], off
|
||||
; GFX1250-NEXT: global_load_u16 v8, v[2:3], off offset:6
|
||||
; GFX1250-NEXT: global_load_u16 v9, v[2:3], off offset:10
|
||||
; GFX1250-NEXT: global_load_u16 v10, v[2:3], off offset:14
|
||||
; GFX1250-NEXT: global_load_u16 v11, v[2:3], off offset:18
|
||||
; GFX1250-NEXT: global_load_u16 v12, v[2:3], off offset:62
|
||||
; GFX1250-NEXT: global_load_u16 v13, v[2:3], off offset:60
|
||||
; GFX1250-NEXT: global_load_u16 v14, v[2:3], off offset:58
|
||||
; GFX1250-NEXT: global_load_u16 v15, v[2:3], off offset:56
|
||||
; GFX1250-NEXT: global_load_u16 v10, v[2:3], off offset:12
|
||||
; GFX1250-NEXT: global_load_u16 v6, v[2:3], off offset:8
|
||||
; GFX1250-NEXT: global_load_u16 v4, v[2:3], off offset:4
|
||||
; GFX1250-NEXT: global_load_u16 v5, v[2:3], off
|
||||
; GFX1250-NEXT: global_load_u16 v7, v[2:3], off offset:6
|
||||
; GFX1250-NEXT: global_load_u16 v8, v[2:3], off offset:62
|
||||
; GFX1250-NEXT: global_load_u16 v9, v[2:3], off offset:60
|
||||
; GFX1250-NEXT: global_load_u16 v11, v[2:3], off offset:58
|
||||
; GFX1250-NEXT: global_load_u16 v12, v[2:3], off offset:56
|
||||
; GFX1250-NEXT: global_load_u16 v13, v[2:3], off offset:10
|
||||
; GFX1250-NEXT: global_load_u16 v14, v[2:3], off offset:14
|
||||
; GFX1250-NEXT: global_load_u16 v15, v[2:3], off offset:18
|
||||
; GFX1250-NEXT: global_load_u16 v16, v[2:3], off offset:28
|
||||
; GFX1250-NEXT: global_load_u16 v17, v[2:3], off offset:24
|
||||
; GFX1250-NEXT: global_load_u16 v18, v[2:3], off offset:20
|
||||
@ -9387,70 +9387,67 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
|
||||
; GFX1250-NEXT: global_load_u16 v33, v[2:3], off offset:48
|
||||
; GFX1250-NEXT: global_load_u16 v34, v[2:3], off offset:54
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x1e
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v37, 16, v4
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x1c
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v81, 16, v5 :: v_dual_lshlrev_b32 v85, 16, v6
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x1a
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v84, 16, v7 :: v_dual_lshlrev_b32 v35, 16, v8
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x18
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v80, 16, v9 :: v_dual_lshlrev_b32 v36, 16, v10
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x15
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v37, 16, v10
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x1b
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v85, 16, v4 :: v_dual_lshlrev_b32 v84, 16, v5
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x19
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v2, 16, v12 :: v_dual_lshlrev_b32 v3, 16, v13
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x14
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v12, 16, v11 :: v_dual_lshlrev_b32 v6, 16, v14
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x13
|
||||
; GFX1250-NEXT: v_lshlrev_b32_e32 v7, 16, v15
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v35, 16, v7 :: v_dual_lshlrev_b32 v2, 16, v8
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x17
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v3, 16, v9 :: v_dual_lshlrev_b32 v7, 16, v11
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x15
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v11, 16, v12 :: v_dual_lshlrev_b32 v12, 16, v13
|
||||
; GFX1250-NEXT: v_lshlrev_b32_e32 v13, 16, v6
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v2
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x11
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v68, 16, v17 :: v_dual_lshlrev_b32 v39, 16, v16
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0xe
|
||||
; GFX1250-NEXT: v_lshlrev_b32_e32 v20, 16, v20
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[8:9], v6
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[6:7], v7
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[8:9], v7
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[6:7], v11
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x13
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v36, 16, v14 :: v_dual_lshlrev_b32 v38, 16, v15
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0xc
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v21, 16, v21 :: v_dual_lshlrev_b32 v38, 16, v22
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v21, 16, v21 :: v_dual_lshlrev_b32 v39, 16, v22
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0xb
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v49, 16, v23 :: v_dual_lshlrev_b32 v68, 16, v17
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x9
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v48, 16, v23 :: v_dual_lshlrev_b32 v25, 16, v25
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v48, 16, v16 :: v_dual_lshlrev_b32 v25, 16, v25
|
||||
; GFX1250-NEXT: v_lshlrev_b32_e32 v24, 16, v24
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x5
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v49, 16, v28 :: v_dual_lshlrev_b32 v64, 16, v29
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v50, 16, v28 :: v_dual_lshlrev_b32 v64, 16, v29
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x3
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v50, 16, v30 :: v_dual_lshlrev_b32 v51, 16, v31
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v51, 16, v30 :: v_dual_lshlrev_b32 v52, 16, v31
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v69, 16, v27 :: v_dual_lshlrev_b32 v70, 16, v26
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v33, 16, v33 :: v_dual_lshlrev_b32 v52, 16, v34
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v32, 16, v32 :: v_dual_lshlrev_b32 v69, 16, v27
|
||||
; GFX1250-NEXT: v_lshlrev_b32_e32 v70, 16, v26
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v53, 16, v34 :: v_dual_lshlrev_b32 v32, 16, v32
|
||||
; GFX1250-NEXT: v_lshlrev_b32_e32 v33, 16, v33
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[22:23], v38
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[30:31], v39
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[38:39], v50
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[54:55], v53
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[14:15], v35
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[54:55], v52
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[52:53], v32
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[30:31], v38
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[28:29], v39
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[34:35], v48
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[38:39], v49
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[28:29], v48
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[34:35], v49
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[48:49], v33
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v13, 16, v19 :: v_dual_lshlrev_b32 v82, 16, v18
|
||||
; GFX1250-NEXT: v_dual_lshlrev_b32 v20, 16, v20 :: v_dual_lshlrev_b32 v81, 16, v18
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[66:67], v64
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[64:65], v25
|
||||
; GFX1250-NEXT: scratch_store_b128 v0, v[2:5], off offset:240
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v50
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[50:51], v51
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[2:3], v24
|
||||
; GFX1250-NEXT: v_lshlrev_b32_e32 v80, 16, v19
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[18:19], v36
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[16:17], v37
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[36:37], v70
|
||||
; GFX1250-NEXT: scratch_store_b128 v0, v[2:5], off offset:240
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v51
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[50:51], v52
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[52:53], v32
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[2:3], v24
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[32:33], v69
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[70:71], v21
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[68:69], v68
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[26:27], v20
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[24:25], v82
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[22:23], v12
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[20:21], v13
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[82:83], v80
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[80:81], v81
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[24:25], v81
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[20:21], v80
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[82:83], v12
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[80:81], v13
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[12:13], v85
|
||||
; GFX1250-NEXT: v_cvt_f64_f32_e32 v[10:11], v1
|
||||
; GFX1250-NEXT: scratch_store_b128 v0, v[6:9], off offset:224
|
||||
@ -38876,15 +38873,15 @@ define <4 x bfloat> @v_sitofp_v4i64_to_v4bf16(<4 x i64> %x) {
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v11, v[0:1]
|
||||
; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_or_b32_e32 v6, v7, v6
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_min_u32_e32 v2, 1, v2
|
||||
; GFX1250-NEXT: v_sub_nc_u32_e32 v7, 32, v10
|
||||
; GFX1250-NEXT: v_or_b32_e32 v6, v7, v6
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
|
||||
; GFX1250-NEXT: v_or_b32_e32 v4, v5, v4
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||
; GFX1250-NEXT: v_dual_sub_nc_u32 v5, 32, v9 :: v_dual_bitop2_b32 v2, v3, v2 bitop3:0x54
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
|
||||
; GFX1250-NEXT: v_or_b32_e32 v0, v1, v0
|
||||
; GFX1250-NEXT: v_dual_sub_nc_u32 v7, 32, v10 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
|
||||
; GFX1250-NEXT: v_sub_nc_u32_e32 v1, 32, v8
|
||||
; GFX1250-NEXT: v_cvt_f32_i32_e32 v3, v6
|
||||
; GFX1250-NEXT: v_cvt_f32_i32_e32 v4, v4
|
||||
@ -41716,29 +41713,30 @@ define <4 x bfloat> @v_uitofp_v4i64_to_v4bf16(<4 x i64> %x) {
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v10, v[0:1]
|
||||
; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v11, v[4:5]
|
||||
; GFX1250-NEXT: v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_sub_nc_u32 v11, 32, v11
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_min_u32_e32 v6, 1, v6
|
||||
; GFX1250-NEXT: v_min_u32_e32 v2, 1, v2
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
|
||||
; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_dual_sub_nc_u32 v9, 32, v9 :: v_dual_bitop2_b32 v6, v7, v6 bitop3:0x54
|
||||
; GFX1250-NEXT: v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_bitop2_b32 v6, v7, v6 bitop3:0x54
|
||||
; GFX1250-NEXT: v_or_b32_e32 v2, v3, v2
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_dual_sub_nc_u32 v3, 32, v10 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
|
||||
; GFX1250-NEXT: v_dual_sub_nc_u32 v3, 32, v11 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
|
||||
; GFX1250-NEXT: v_or_b32_e32 v1, v5, v4
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_cvt_f32_u32_e32 v4, v6
|
||||
; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_sub_nc_u32_e32 v5, 32, v9
|
||||
; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
|
||||
; GFX1250-NEXT: v_sub_nc_u32_e32 v6, 32, v10
|
||||
; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_ldexp_f32 v4, v4, v8
|
||||
; GFX1250-NEXT: v_ldexp_f32 v2, v2, v9
|
||||
; GFX1250-NEXT: v_ldexp_f32 v2, v2, v5
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_ldexp_f32 v0, v0, v3
|
||||
; GFX1250-NEXT: v_ldexp_f32 v1, v1, v11
|
||||
; GFX1250-NEXT: v_ldexp_f32 v0, v0, v6
|
||||
; GFX1250-NEXT: v_ldexp_f32 v1, v1, v3
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, v4
|
||||
|
||||
@ -40,7 +40,7 @@ define spir_kernel void @kernel(ptr addrspace(1) %out) {
|
||||
; GFX1250-LABEL: kernel:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v0, s[0:1]
|
||||
@ -290,7 +290,7 @@ define amdgpu_kernel void @call_coldcc() #0 {
|
||||
; GFX1250-NEXT: s_get_pc_i64 s[6:7]
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[6:7], s[6:7], coldcc@gotpcrel+4
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 1.0
|
||||
; GFX1250-NEXT: s_load_b64 s[12:13], s[6:7], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[12:13], s[6:7], 0x0 nv
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[4:5], 36
|
||||
; GFX1250-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
@ -402,7 +402,7 @@ define amdgpu_kernel void @call_fastcc() #0 {
|
||||
; GFX1250-NEXT: s_get_pc_i64 s[6:7]
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[6:7], s[6:7], fastcc@gotpcrel+4
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 1.0
|
||||
; GFX1250-NEXT: s_load_b64 s[12:13], s[6:7], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[12:13], s[6:7], 0x0 nv
|
||||
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[4:5], 36
|
||||
; GFX1250-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
@ -1437,7 +1437,7 @@ define amdgpu_kernel void @amd_kernel_i8(i8 %arg0) {
|
||||
; GFX1250-LABEL: amd_kernel_i8:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_add_co_i32 s0, s0, s0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
@ -1503,7 +1503,7 @@ define amdgpu_kernel void @amd_kernel_v2i8(<2 x i8> %arg0) {
|
||||
; GFX1250-LABEL: amd_kernel_v2i8:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_bfe_u32 s1, s0, 0x80008
|
||||
@ -1605,7 +1605,7 @@ define amdgpu_kernel void @amd_kernel_v4i8(<4 x i8> %arg0) {
|
||||
; GFX1250-LABEL: amd_kernel_v4i8:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_lshr_b32 s1, s0, 16
|
||||
@ -1704,7 +1704,7 @@ define amdgpu_kernel void @amd_kernel_v3i8(<3 x i8> %arg0) {
|
||||
; GFX1250-LABEL: amd_kernel_v3i8:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 2
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
@ -1825,7 +1825,7 @@ define amdgpu_kernel void @amd_kernel_v5i8(<5 x i8> %arg0) {
|
||||
; GFX1250-LABEL: amd_kernel_v5i8:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
@ -1991,8 +1991,8 @@ define amdgpu_kernel void @amd_kernel_v8i8(<8 x i8> %arg0) {
|
||||
; GFX1250-LABEL: amd_kernel_v8i8:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_lshr_b32 s2, s0, 16
|
||||
; GFX1250-NEXT: s_lshr_b32 s3, s0, 24
|
||||
@ -2021,14 +2021,14 @@ define amdgpu_kernel void @amd_kernel_v8i8(<8 x i8> %arg0) {
|
||||
; GFX1250-NEXT: s_or_b32 s0, s0, s6
|
||||
; GFX1250-NEXT: s_or_b32 s2, s2, s3
|
||||
; GFX1250-NEXT: s_and_b32 s1, s1, 0xffff
|
||||
; GFX1250-NEXT: s_lshl_b32 s3, s4, 16
|
||||
; GFX1250-NEXT: s_and_b32 s0, s0, 0xffff
|
||||
; GFX1250-NEXT: s_lshl_b32 s2, s2, 16
|
||||
; GFX1250-NEXT: s_or_b32 s1, s1, s3
|
||||
; GFX1250-NEXT: s_lshl_b32 s3, s4, 16
|
||||
; GFX1250-NEXT: s_or_b32 s0, s0, s2
|
||||
; GFX1250-NEXT: s_or_b32 s1, s1, s3
|
||||
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX1250-NEXT: global_store_b64 v[0:1], v[2:3], off
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||||
; GFX1250-NEXT: global_store_b64 v[2:3], v[0:1], off
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%add = add <8 x i8> %arg0, %arg0
|
||||
@ -2269,7 +2269,7 @@ define amdgpu_kernel void @amd_kernel_v16i8(<16 x i8> %arg0) {
|
||||
; GFX1250-LABEL: amd_kernel_v16i8:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[4:5], 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_lshr_b32 s6, s1, 16
|
||||
@ -2791,7 +2791,7 @@ define amdgpu_kernel void @amd_kernel_v32i8(<32 x i8> %arg0) {
|
||||
; GFX1250-LABEL: amd_kernel_v32i8:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[8:9], 16
|
||||
; GFX1250-NEXT: v_mov_b64_e32 v[10:11], 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
|
||||
@ -63,20 +63,21 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2
|
||||
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[8:11], v[2:3], off
|
||||
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], 12
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[12:13], 0
|
||||
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
|
||||
; GCN-SDAG-NEXT: s_wait_xcnt 0x1
|
||||
; GCN-SDAG-NEXT: v_pk_add_u16 v1, v6, v10
|
||||
; GCN-SDAG-NEXT: v_pk_add_u16 v12, v7, v11
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[6:7], 8
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 0
|
||||
; GCN-SDAG-NEXT: v_pk_add_u16 v5, v5, v9
|
||||
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
|
||||
; GCN-SDAG-NEXT: v_pk_add_u16 v3, v5, v9
|
||||
; GCN-SDAG-NEXT: v_pk_add_u16 v5, v7, v11
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[6:7], 12
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 8
|
||||
; GCN-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v1
|
||||
; GCN-SDAG-NEXT: v_pk_add_u16 v4, v4, v8
|
||||
; GCN-SDAG-NEXT: v_pk_add_u16 v2, v4, v8
|
||||
; GCN-SDAG-NEXT: s_clause 0x2
|
||||
; GCN-SDAG-NEXT: global_store_b16 v[2:3], v12, off
|
||||
; GCN-SDAG-NEXT: global_store_b32 v[6:7], v1, off
|
||||
; GCN-SDAG-NEXT: global_store_b64 v[10:11], v[4:5], off
|
||||
; GCN-SDAG-NEXT: global_store_b16 v[6:7], v5, off
|
||||
; GCN-SDAG-NEXT: global_store_b32 v[10:11], v1, off
|
||||
; GCN-SDAG-NEXT: global_store_b64 v[12:13], v[2:3], off
|
||||
; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
|
||||
;
|
||||
; GCN-GISEL-LABEL: test_v7i16_load_store:
|
||||
@ -254,15 +255,15 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:64
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[48:49], 48
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[50:51], 32
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x70
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[64:65], 16
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[50:51], 32
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[38:39], 0x60
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[64:65], 16
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[66:67], 0
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[52:53], 0x50
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[54:55], 64
|
||||
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
|
||||
; GCN-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0xc8
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[54:55], 64
|
||||
; GCN-SDAG-NEXT: s_wait_loadcnt 0x7
|
||||
; GCN-SDAG-NEXT: global_store_b128 v[2:3], v[6:9], off
|
||||
; GCN-SDAG-NEXT: s_wait_loadcnt 0x6
|
||||
@ -290,13 +291,13 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[48:49], v[34:35], v[34:35]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[16:17], v[16:17], v[16:17]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[14:15], 0xc8, v[14:15]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[24:25], 0x64, v[24:25]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[22:23], v[22:23], v[22:23]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[28:29], v[28:29], v[28:29]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[26:27], v[26:27], v[26:27]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[30:31], v[30:31], v[30:31]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[20:21], v[20:21], v[20:21]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[24:25], 0x64, v[24:25]
|
||||
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[22:23], v[22:23], v[22:23]
|
||||
; GCN-SDAG-NEXT: s_clause 0x1
|
||||
; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[0:3], off
|
||||
; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[34:37], off
|
||||
@ -403,10 +404,10 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1,
|
||||
; GCN-SDAG-LABEL: test_v7i16_load_store_kernel:
|
||||
; GCN-SDAG: ; %bb.0:
|
||||
; GCN-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GCN-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
|
||||
; GCN-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 nv
|
||||
; GCN-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0
|
||||
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
|
||||
; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
|
||||
; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 nv
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 8
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[12:13], 0
|
||||
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
@ -431,10 +432,10 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1,
|
||||
; GCN-GISEL-LABEL: test_v7i16_load_store_kernel:
|
||||
; GCN-GISEL: ; %bb.0:
|
||||
; GCN-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GCN-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
|
||||
; GCN-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 nv
|
||||
; GCN-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0
|
||||
; GCN-GISEL-NEXT: s_wait_xcnt 0x0
|
||||
; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
|
||||
; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 nv
|
||||
; GCN-GISEL-NEXT: v_mov_b64_e32 v[10:11], 2
|
||||
; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 4
|
||||
; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 6
|
||||
|
||||
@ -4435,20 +4435,20 @@ define <4 x i32> @clpeak_imad_pat_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7
|
||||
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v4, 1, v8 :: v_dual_add_nc_u32 v5, 1, v9
|
||||
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v6, 1, v10 :: v_dual_add_nc_u32 v7, 1, v11
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v0, v4
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v1, v5
|
||||
; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v0
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v6, v2, v6
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v7, v3, v7
|
||||
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1
|
||||
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, 1, v2 :: v_dual_add_nc_u32 v3, 1, v3
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v4, v0
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v5, v1
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v6, v2
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v7, v3
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v1, v5
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_3)
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v2, v6
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v6, v3, v7
|
||||
; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v1, 1, v1 :: v_dual_add_nc_u32 v2, 1, v2
|
||||
; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2
|
||||
; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v6, v3
|
||||
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%y18 = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
|
||||
|
||||
@ -11,8 +11,8 @@ define amdgpu_ps void @cluster_load_async_to_lds_b8_vaddr(ptr addrspace(1) %gadd
|
||||
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b8_vaddr:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %entry
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
|
||||
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3
|
||||
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
|
||||
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0
|
||||
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b8 v2, v[0:1], off offset:16 th:TH_LOAD_NT
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
@ -75,8 +75,8 @@ define amdgpu_ps void @cluster_load_async_to_lds_b32_vaddr(ptr addrspace(1) %gad
|
||||
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b32_vaddr:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %entry
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
|
||||
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3
|
||||
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
|
||||
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0
|
||||
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b32 v2, v[0:1], off offset:16 th:TH_LOAD_HT scope:SCOPE_SE
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
@ -139,8 +139,8 @@ define amdgpu_ps void @cluster_load_async_to_lds_b64_vaddr(ptr addrspace(1) %gad
|
||||
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b64_vaddr:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %entry
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
|
||||
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3
|
||||
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
|
||||
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0
|
||||
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b64 v2, v[0:1], off offset:16 th:TH_LOAD_NT_HT scope:SCOPE_DEV
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
@ -203,8 +203,8 @@ define amdgpu_ps void @cluster_load_async_to_lds_b128_vaddr(ptr addrspace(1) %ga
|
||||
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b128_vaddr:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %entry
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
|
||||
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3
|
||||
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1]
|
||||
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0
|
||||
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b128 v2, v[0:1], off offset:16 th:TH_LOAD_BYPASS scope:SCOPE_SYS
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
|
||||
@ -428,30 +428,30 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v9, 0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v1, v9
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_ashrrev_i32 v12, 31, v0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_dual_ashrrev_i32 v7, 31, v6 :: v_dual_mov_b32 v1, v9
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v21, v9
|
||||
; GFX1250-NEXT: v_mul_u64_e32 v[10:11], v[0:1], v[8:9]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_dual_ashrrev_i32 v12, 31, v0 :: v_dual_mov_b32 v8, v11
|
||||
; GFX1250-NEXT: v_dual_ashrrev_i32 v7, 31, v6 :: v_dual_mov_b32 v13, v12
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[14:15], v12, v6, v[8:9]
|
||||
; GFX1250-NEXT: v_mul_u64_e32 v[16:17], v[6:7], v[12:13]
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v13, v12 :: v_dual_mov_b32 v8, v11
|
||||
; GFX1250-NEXT: v_mul_u64_e32 v[14:15], v[6:7], v[12:13]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v8, v14
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v0, v7, v[8:9]
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v12, v6, v[8:9]
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v8, v16
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v0, v7, v[8:9]
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v8, v17 :: v_dual_mov_b32 v20, v19
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v8, v15 :: v_dual_mov_b32 v20, v19
|
||||
; GFX1250-NEXT: v_add_nc_u64_e32 v[8:9], v[8:9], v[20:21]
|
||||
; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v7, v0, v[16:17]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mad_nc_u64_u32 v[8:9], v12, v7, v[8:9]
|
||||
; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v7, v0, v[14:15]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||
; GFX1250-NEXT: v_add_nc_u64_e32 v[6:7], v[8:9], v[0:1]
|
||||
; GFX1250-NEXT: v_add_co_u32 v0, vcc_lo, v10, v2
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v18, v3, vcc_lo
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v6, v4, vcc_lo
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4)
|
||||
; GFX1250-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v7, v5, vcc_lo
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%sext0 = sext i32 %arg0 to i128
|
||||
@ -1120,8 +1120,8 @@ define amdgpu_kernel void @mad_i64_i32_uniform(ptr addrspace(1) %out, i32 %arg0,
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_clause 0x1
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 nv
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s5, 0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
|
||||
|
||||
@ -35,7 +35,7 @@ define amdgpu_kernel void @fadd_v2_vv(ptr addrspace(1) %a) {
|
||||
; GFX1250-LABEL: fadd_v2_vv:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -79,7 +79,7 @@ define amdgpu_kernel void @fadd_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
|
||||
; GFX1250-LABEL: fadd_v2_vs:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -145,8 +145,8 @@ define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_clause 0x1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
|
||||
@ -163,8 +163,8 @@ define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_clause 0x1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
|
||||
@ -349,9 +349,9 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_clause 0x2
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
@ -415,11 +415,11 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v32_vs:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_clause 0x1
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v56, 7, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
@ -534,7 +534,7 @@ define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_imm:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_mov_b32 s2, 0x42c80000
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
@ -547,7 +547,7 @@ define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_imm:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 0x42c80000
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
@ -608,7 +608,7 @@ define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_v_splat:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[2:3], v0, s[0:1] scale_offset
|
||||
@ -620,7 +620,7 @@ define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_v_splat:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, v0
|
||||
@ -683,7 +683,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_lit_splat:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -695,7 +695,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_lit_splat:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 1.0
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
@ -744,7 +744,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_lit_hi0:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x3f800000
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
@ -757,7 +757,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_lit_hi0:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x3f800000
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
@ -806,7 +806,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_lit_lo0:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x3f80000000000000
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
@ -819,7 +819,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_lit_lo0:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x3f80000000000000
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
@ -868,7 +868,7 @@ define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_unfoldable_lit:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x400000003f800000
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
@ -881,7 +881,7 @@ define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_unfoldable_lit:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x400000003f800000
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
@ -946,7 +946,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -958,7 +958,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -1025,7 +1025,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg_lo:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -1037,7 +1037,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg_lo:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -1104,7 +1104,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg_hi:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -1116,7 +1116,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg_hi:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -1180,7 +1180,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, flo
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg_lo2:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -1194,7 +1194,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, flo
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg_lo2:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -1258,7 +1258,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, flo
|
||||
; GFX1250-SDAG-LABEL: fadd_v2_v_fneg_hi2:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -1272,7 +1272,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, flo
|
||||
; GFX1250-GISEL-LABEL: fadd_v2_v_fneg_hi2:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -1322,7 +1322,7 @@ define amdgpu_kernel void @fmul_v2_vv(ptr addrspace(1) %a) {
|
||||
; GFX1250-LABEL: fmul_v2_vv:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -1366,7 +1366,7 @@ define amdgpu_kernel void @fmul_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
|
||||
; GFX1250-LABEL: fmul_v2_vs:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -1432,8 +1432,8 @@ define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_clause 0x1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
|
||||
@ -1450,8 +1450,8 @@ define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_clause 0x1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
|
||||
@ -1636,9 +1636,9 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_clause 0x2
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
@ -1702,11 +1702,11 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
||||
; GFX1250-GISEL-LABEL: fmul_v32_vs:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_clause 0x1
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v56, 7, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
@ -1820,7 +1820,7 @@ define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fmul_v2_v_imm:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_mov_b32 s2, 0x42c80000
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
@ -1833,7 +1833,7 @@ define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fmul_v2_v_imm:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 0x42c80000
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
@ -1894,7 +1894,7 @@ define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fmul_v2_v_v_splat:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[2:3], v0, s[0:1] scale_offset
|
||||
@ -1906,7 +1906,7 @@ define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fmul_v2_v_v_splat:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, v0
|
||||
@ -1969,7 +1969,7 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fmul_v2_v_lit_splat:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -1981,7 +1981,7 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fmul_v2_v_lit_splat:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 4.0
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
@ -2031,7 +2031,7 @@ define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fmul_v2_v_unfoldable_lit:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x4040000040800000
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
@ -2044,7 +2044,7 @@ define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fmul_v2_v_unfoldable_lit:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x4040000040800000
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
@ -2108,7 +2108,7 @@ define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-SDAG-LABEL: fmul_v2_v_fneg:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -2120,7 +2120,7 @@ define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-GISEL-LABEL: fmul_v2_v_fneg:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -2170,7 +2170,7 @@ define amdgpu_kernel void @fma_v2_vv(ptr addrspace(1) %a) {
|
||||
; GFX1250-LABEL: fma_v2_vv:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -2214,7 +2214,7 @@ define amdgpu_kernel void @fma_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
|
||||
; GFX1250-LABEL: fma_v2_vs:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -2280,8 +2280,8 @@ define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_clause 0x1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
|
||||
@ -2298,8 +2298,8 @@ define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_clause 0x1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset
|
||||
@ -2483,11 +2483,11 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
||||
; GFX1250-SDAG-LABEL: fma_v32_vs:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_clause 0x1
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
|
||||
; GFX1250-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
|
||||
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v56, 7, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
@ -2522,10 +2522,11 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
||||
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[40:41], s[8:9]
|
||||
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[4:5], v[4:5], v[38:39], v[38:39]
|
||||
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[38:39], s[36:37]
|
||||
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[28:29], v[28:29], v[52:53], v[52:53]
|
||||
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x2
|
||||
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[20:21], v[20:21], v[32:33], v[32:33]
|
||||
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[22:23], v[22:23], v[34:35], v[34:35]
|
||||
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[28:29], v[28:29], v[52:53], v[52:53]
|
||||
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[30:31], v[30:31], v[54:55], v[54:55]
|
||||
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[26:27], v[26:27], v[50:51], v[50:51]
|
||||
; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[24:25], v[24:25], v[40:41], v[40:41]
|
||||
@ -2549,11 +2550,11 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
||||
; GFX1250-GISEL-LABEL: fma_v32_vs:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_clause 0x1
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[36:51], s[4:5], 0xa4 nv
|
||||
; GFX1250-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0xe4 nv
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v56, 7, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
@ -2690,7 +2691,7 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fma_v2_v_imm:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_mov_b32 s2, 0x43480000
|
||||
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
|
||||
@ -2705,7 +2706,7 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fma_v2_v_imm:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v6, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 0x42c80000
|
||||
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
|
||||
@ -2769,7 +2770,7 @@ define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fma_v2_v_v_splat:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[2:3], v0, s[0:1] scale_offset
|
||||
@ -2781,7 +2782,7 @@ define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fma_v2_v_v_splat:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, v0
|
||||
@ -2864,7 +2865,7 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fma_v2_v_lit_splat:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -2876,7 +2877,7 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fma_v2_v_lit_splat:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v6, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 4.0
|
||||
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
|
||||
@ -2966,7 +2967,7 @@ define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fma_v2_v_unfoldable_lit:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v6, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x4040000040800000
|
||||
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x400000003f800000
|
||||
@ -2980,7 +2981,7 @@ define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fma_v2_v_unfoldable_lit:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v6, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], 0x4040000040800000
|
||||
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
|
||||
@ -3046,7 +3047,7 @@ define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-SDAG-LABEL: fma_v2_v_fneg:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -3058,7 +3059,7 @@ define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) {
|
||||
; GFX1250-GISEL-LABEL: fma_v2_v_fneg:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v4, s[0:1] scale_offset
|
||||
@ -3129,7 +3130,7 @@ define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %ou
|
||||
; GFX1250-SDAG-LABEL: add_vector_neg_bitcast_scalar_lo:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %bb
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s2
|
||||
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s3
|
||||
@ -3143,7 +3144,7 @@ define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %ou
|
||||
; GFX1250-GISEL-LABEL: add_vector_neg_bitcast_scalar_lo:
|
||||
; GFX1250-GISEL: ; %bb.0: ; %bb
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s3
|
||||
; GFX1250-GISEL-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
|
||||
@ -3224,7 +3225,7 @@ define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspa
|
||||
; GFX1250-SDAG-LABEL: fma_vector_vector_neg_scalar_lo_scalar_hi:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %bb
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v2, s2
|
||||
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v5, s3
|
||||
@ -3240,7 +3241,7 @@ define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspa
|
||||
; GFX1250-GISEL-LABEL: fma_vector_vector_neg_scalar_lo_scalar_hi:
|
||||
; GFX1250-GISEL: ; %bb.0: ; %bb
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s3
|
||||
; GFX1250-GISEL-NEXT: ds_load_2addr_b32 v[0:1], v2 offset1:1
|
||||
@ -3320,7 +3321,7 @@ define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1250-SDAG-LABEL: shuffle_add_f32:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %bb
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v2, s2
|
||||
; GFX1250-SDAG-NEXT: ds_load_b64 v[0:1], v2
|
||||
@ -3333,7 +3334,7 @@ define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1250-GISEL-LABEL: shuffle_add_f32:
|
||||
; GFX1250-GISEL: ; %bb.0: ; %bb
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX1250-GISEL-NEXT: ds_load_b64 v[0:1], v2
|
||||
@ -3410,7 +3411,7 @@ define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrsp
|
||||
; GFX1250-SDAG-LABEL: shuffle_neg_add_f32:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %bb
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v2, s2
|
||||
; GFX1250-SDAG-NEXT: ds_load_b64 v[0:1], v2
|
||||
@ -3426,7 +3427,7 @@ define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrsp
|
||||
; GFX1250-GISEL-LABEL: shuffle_neg_add_f32:
|
||||
; GFX1250-GISEL: ; %bb.0: ; %bb
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX1250-GISEL-NEXT: ds_load_b64 v[0:1], v2
|
||||
@ -3502,7 +3503,7 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
|
||||
; GFX1250-SDAG-LABEL: fadd_fadd_fsub_0:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %bb
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: s_add_f32 s1, s1, 0
|
||||
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
|
||||
@ -3514,7 +3515,7 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
|
||||
; GFX1250-GISEL-LABEL: fadd_fadd_fsub_0:
|
||||
; GFX1250-GISEL: ; %bb.0: ; %bb
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
|
||||
@ -3605,8 +3606,8 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
|
||||
; GFX1250-SDAG: ; %bb.0: ; %bb
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_clause 0x1
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 nv
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
|
||||
; GFX1250-SDAG-NEXT: s_add_f32 s2, s1, s3
|
||||
@ -3623,8 +3624,8 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
|
||||
; GFX1250-GISEL: ; %bb.0: ; %bb
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_clause 0x1
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 nv
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
|
||||
@ -3694,7 +3695,7 @@ define amdgpu_kernel void @fadd_shuffle_v4(ptr addrspace(1) %arg) {
|
||||
; GFX1250-SDAG-LABEL: fadd_shuffle_v4:
|
||||
; GFX1250-SDAG: ; %bb.0: ; %bb
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset
|
||||
@ -3707,7 +3708,7 @@ define amdgpu_kernel void @fadd_shuffle_v4(ptr addrspace(1) %arg) {
|
||||
; GFX1250-GISEL-LABEL: fadd_shuffle_v4:
|
||||
; GFX1250-GISEL: ; %bb.0: ; %bb
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v6, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b128 v[0:3], v6, s[0:1] scale_offset
|
||||
@ -3768,7 +3769,7 @@ define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
|
||||
; GFX1250-SDAG-LABEL: fneg_v2f32_vec:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -3780,7 +3781,7 @@ define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
|
||||
; GFX1250-GISEL-LABEL: fneg_v2f32_vec:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
|
||||
@ -3833,7 +3834,7 @@ define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x
|
||||
; GFX1250-SDAG-LABEL: fneg_v2f32_scalar:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-SDAG-NEXT: s_xor_b32 s2, s2, 0x80000000
|
||||
; GFX1250-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000
|
||||
@ -3845,7 +3846,7 @@ define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x
|
||||
; GFX1250-GISEL-LABEL: fneg_v2f32_scalar:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
||||
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
|
||||
|
||||
@ -4106,50 +4106,47 @@ define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) %
|
||||
; GFX1250-LABEL: compute_mad:
|
||||
; GFX1250: ; %bb.0: ; %bb
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x10
|
||||
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x10 nv
|
||||
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b128 s[4:7], s[4:5], 0x0 nv
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_add_co_i32 s0, s10, 1
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v1, s0, v0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_dual_add_nc_u32 v2, s0, v1 :: v_dual_add_nc_u32 v1, 1, v1
|
||||
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_add_co_i32 s2, s2, 1
|
||||
; GFX1250-NEXT: s_load_b32 s6, s[6:7], 0x4 nv
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v1, s2, v0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_dual_add_nc_u32 v2, s2, v1 :: v_dual_add_nc_u32 v1, 1, v1
|
||||
; GFX1250-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_add_co_i32 s7, s2, 1
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 nv
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_and_b32 s4, ttmp6, 15
|
||||
; GFX1250-NEXT: s_getreg_b32 s5, hwreg(HW_REG_IB_STS2, 6, 4)
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v3, v2, v1
|
||||
; GFX1250-NEXT: s_mul_i32 s5, ttmp9, s7
|
||||
; GFX1250-NEXT: s_getreg_b32 s7, hwreg(HW_REG_IB_STS2, 6, 4)
|
||||
; GFX1250-NEXT: s_add_co_i32 s4, s4, s5
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x4
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_bfe_u32 s3, ttmp6, 0x4000c
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX1250-NEXT: s_add_co_i32 s3, s3, 1
|
||||
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: s_mul_i32 s3, ttmp9, s3
|
||||
; GFX1250-NEXT: s_and_b32 s5, s6, 0xffff
|
||||
; GFX1250-NEXT: s_cmp_eq_u32 s7, 0
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v3, v2, v1
|
||||
; GFX1250-NEXT: s_cselect_b32 s4, ttmp9, s4
|
||||
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_mad_u32 v0, s4, s5, v0
|
||||
; GFX1250-NEXT: v_add_nc_u32_e32 v1, v3, v1
|
||||
; GFX1250-NEXT: s_add_co_i32 s4, s4, s3
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v2
|
||||
; GFX1250-NEXT: v_add_nc_u32_e32 v2, 1, v3
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff
|
||||
; GFX1250-NEXT: s_cmp_eq_u32 s5, 0
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v3, v1, v2
|
||||
; GFX1250-NEXT: s_cselect_b32 s3, ttmp9, s4
|
||||
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_mad_u32 v0, s3, s2, v0
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_add_nc_u32_e32 v2, v3, v2
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v1
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||
; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[2:3], v[0:1]
|
||||
; GFX1250-NEXT: v_mad_u32 v3, v2, v3, v2
|
||||
; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[8:9]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
|
||||
; GFX1250-NEXT: v_mad_u32 v2, v3, v2, v3
|
||||
; GFX1250-NEXT: global_store_b32 v[0:1], v2, off
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user