[AMDGPU][True16] Use COPY instead of V_MOV for non-imm operand in movePackToVALU lowering (#185754)

We should use COPY here for no-imm operands to reduce the number of
generated mov in the isa. However, there is an issue in
https://github.com/llvm/llvm-project/pull/162389#discussion_r2430459341
here that blocked me from doing it.

With https://github.com/llvm/llvm-project/pull/185751 this should work
now
This commit is contained in:
Brox Chen 2026-03-12 17:12:45 -04:00 committed by GitHub
parent 1fa7051d82
commit 4456f31a8f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 1175 additions and 1765 deletions

View File

@ -9327,14 +9327,18 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
Register SrcReg0, SrcReg1;
if (!Src0.isReg() || !RI.isVGPR(MRI, Src0.getReg())) {
SrcReg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg0).add(Src0);
BuildMI(*MBB, Inst, DL,
get(Src0.isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg0)
.add(Src0);
} else {
SrcReg0 = Src0.getReg();
}
if (!Src1.isReg() || !RI.isVGPR(MRI, Src1.getReg())) {
SrcReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg1).add(Src1);
BuildMI(*MBB, Inst, DL,
get(Src1.isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg1)
.add(Src1);
} else {
SrcReg1 = Src1.getReg();
}

View File

@ -148048,10 +148048,9 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v1
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s9, v0
; GFX11-TRUE16-NEXT: s_mov_b32 s75, 0
; GFX11-TRUE16-NEXT: s_clause 0x2 ; 12-byte Folded Spill
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:328
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:324
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:320
; GFX11-TRUE16-NEXT: s_clause 0x1 ; 8-byte Folded Spill
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:324
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:320
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7)
; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v31
; GFX11-TRUE16-NEXT: s_and_b32 s76, vcc_lo, exec_lo
@ -148059,10 +148058,10 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, 0xc0c0004 :: v_dual_lshlrev_b32 v13, 8, v86
; GFX11-TRUE16-NEXT: s_and_b32 s77, s43, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s11, 8
; GFX11-TRUE16-NEXT: s_and_b32 s76, s63, 0xff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: s_and_b32 s76, s43, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s77, s11, 8
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 8, v37
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s2, s3, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s22, s23, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s26, s27, v8
@ -148076,12 +148075,12 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v6.l
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s40, s8, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s14, s7, v8
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s77
; GFX11-TRUE16-NEXT: v_perm_b32 v1, s16, s17, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s12, s6, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v4.l
; GFX11-TRUE16-NEXT: v_perm_b32 v4, s28, s29, v8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s77 :: v_dual_lshlrev_b32 v15, 8, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s76 :: v_dual_lshlrev_b32 v15, 8, v70
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s61, s45, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v6.l
@ -148089,130 +148088,124 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v7.l
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s72, s58, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v9.l
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s57, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s59, s44, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v68
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 8, v37
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s79
; GFX11-TRUE16-NEXT: s_and_b32 s79, s41, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s10, 8
; GFX11-TRUE16-NEXT: s_and_b32 s88, s41, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s89, s10, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v11, s73, s42, v8
; GFX11-TRUE16-NEXT: s_or_b32 s79, s79, s88
; GFX11-TRUE16-NEXT: s_or_b32 s88, s88, s89
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v12
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s79
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s88
; GFX11-TRUE16-NEXT: s_and_b32 s88, s74, 0xff
; GFX11-TRUE16-NEXT: s_and_b32 s77, s63, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s76, s57, 8
; GFX11-TRUE16-NEXT: s_and_b32 s78, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s5, 8
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v114
; GFX11-TRUE16-NEXT: s_and_b32 s79, s74, 0xff
; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v83
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s79, v13
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s88, v13
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v50
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 8, v33
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v38
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 8, v32
; GFX11-TRUE16-NEXT: s_or_b32 s76, s77, s76
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s79
; GFX11-TRUE16-NEXT: s_and_b32 s78, s62, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s47, 8
; GFX11-TRUE16-NEXT: s_and_b32 s88, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s89, s13, 8
; GFX11-TRUE16-NEXT: s_or_b32 s78, s78, s79
; GFX11-TRUE16-NEXT: s_or_b32 s79, s88, s89
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v8.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v14, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v38
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 8, v32
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v83
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 8, v33
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v50
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s78, s79
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v39
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v13
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, v8 :: v_dual_lshlrev_b32 v17, 8, v39
; GFX11-TRUE16-NEXT: v_perm_b32 v13, v134, v96, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v14, v15
; GFX11-TRUE16-NEXT: v_perm_b32 v18, v116, v67, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v19, v130, v80, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v10
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v16, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v15.l
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v99, v65, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v15, v16
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v53, v34, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v99, v65, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v66, v35, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v16, v97, v48, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v17, v103, v54, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v71, v51, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v17, v103, v54, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v69, v36, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v20, v145, v112, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v21, v135, v81, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v18, v116, v67, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v84, v49, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v19, v130, v80, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v98, v52, 0xc0c0004
; GFX11-TRUE16-NEXT: s_and_b32 s78, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s77, s5, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v20, v145, v112, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v113, v55, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v21, v135, v81, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v118, v85, 0xc0c0004
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s77
; GFX11-TRUE16-NEXT: v_perm_b32 v22, v147, v100, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v23, v151, v115, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v118, v85, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v117
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 8, v64
; GFX11-TRUE16-NEXT: v_perm_b32 v23, v151, v115, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v164
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v119
; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v131
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v82
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v24
; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v149
; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v8, v25
; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v167
; GFX11-TRUE16-NEXT: s_and_b32 s78, s62, 0xff
; GFX11-TRUE16-NEXT: v_perm_b32 v25, v176, v148, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v26, v27
; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v149
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v101
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v144
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v8
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s47, 8
; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v26, v27
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v87
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 8, v128
; GFX11-TRUE16-NEXT: s_and_b32 s88, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s89, s13, 8
; GFX11-TRUE16-NEXT: s_or_b32 s78, s78, s79
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v25
; GFX11-TRUE16-NEXT: v_perm_b32 v25, v176, v148, 0xc0c0004
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 8, v87
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v128
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v8.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v28, v26
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v27
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v167
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v101
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v26
; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v160
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v102
; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v8, v27
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v28, v29
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v162
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 8, v132
; GFX11-TRUE16-NEXT: s_or_b32 s79, s88, s89
; GFX11-TRUE16-NEXT: v_perm_b32 v27, v177, v146, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v179
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s78, s79
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v150
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v28
; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v160
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v102
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v183, 8, v129
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v30, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v133
; GFX11-TRUE16-NEXT: v_and_b32_e32 v40, 0xff, v182
; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v8, v29
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xff, v166
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v30, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 8, v133
; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xff, v165
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s76 :: v_dual_and_b32 v41, 0xff, v182
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, v10
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v29, v30
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v183
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v183, 8, v178
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v165
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 8, v129
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v41, v29, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v178
; GFX11-TRUE16-NEXT: v_perm_b32 v29, v181, v163, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, s77
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v42, v30
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v40.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v41, v183
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v31.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v183, v8, v10
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s76
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v40, v31
; GFX11-TRUE16-NEXT: v_perm_b32 v31, v180, v161, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v42.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, s77
; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.h, v183.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v41.l
; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s75
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB89_3
; GFX11-TRUE16-NEXT: .LBB89_2: ; %cmp.true
@ -148445,10 +148438,9 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a
; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.h, v129.l
; GFX11-TRUE16-NEXT: v_mov_b16_e64 v31.h, v133.l
; GFX11-TRUE16-NEXT: .LBB89_3: ; %end
; GFX11-TRUE16-NEXT: s_clause 0x2 ; 12-byte Folded Reload
; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:320
; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:324
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:328
; GFX11-TRUE16-NEXT: s_clause 0x1 ; 8-byte Folded Reload
; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:320
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:324
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-TRUE16-NEXT: .LBB89_4:
@ -174590,10 +174582,9 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v1
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s9, v0
; GFX11-TRUE16-NEXT: s_mov_b32 s75, 0
; GFX11-TRUE16-NEXT: s_clause 0x2 ; 12-byte Folded Spill
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:328
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:324
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:320
; GFX11-TRUE16-NEXT: s_clause 0x1 ; 8-byte Folded Spill
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:324
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:320
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7)
; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v31
; GFX11-TRUE16-NEXT: s_and_b32 s76, vcc_lo, exec_lo
@ -174601,10 +174592,10 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, 0xc0c0004 :: v_dual_lshlrev_b32 v13, 8, v86
; GFX11-TRUE16-NEXT: s_and_b32 s77, s43, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s11, 8
; GFX11-TRUE16-NEXT: s_and_b32 s76, s63, 0xff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: s_and_b32 s76, s43, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s77, s11, 8
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 8, v37
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s2, s3, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s22, s23, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s26, s27, v8
@ -174618,12 +174609,12 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v6.l
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s40, s8, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s14, s7, v8
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s77
; GFX11-TRUE16-NEXT: v_perm_b32 v1, s16, s17, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s12, s6, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v4.l
; GFX11-TRUE16-NEXT: v_perm_b32 v4, s28, s29, v8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s77 :: v_dual_lshlrev_b32 v15, 8, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s76 :: v_dual_lshlrev_b32 v15, 8, v70
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s61, s45, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v6.l
@ -174631,130 +174622,124 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v7.l
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s72, s58, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v9.l
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s57, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s59, s44, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v68
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 8, v37
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s79
; GFX11-TRUE16-NEXT: s_and_b32 s79, s41, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s10, 8
; GFX11-TRUE16-NEXT: s_and_b32 s88, s41, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s89, s10, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v11, s73, s42, v8
; GFX11-TRUE16-NEXT: s_or_b32 s79, s79, s88
; GFX11-TRUE16-NEXT: s_or_b32 s88, s88, s89
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v12
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s79
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s88
; GFX11-TRUE16-NEXT: s_and_b32 s88, s74, 0xff
; GFX11-TRUE16-NEXT: s_and_b32 s77, s63, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s76, s57, 8
; GFX11-TRUE16-NEXT: s_and_b32 s78, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s5, 8
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v114
; GFX11-TRUE16-NEXT: s_and_b32 s79, s74, 0xff
; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v83
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s79, v13
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s88, v13
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v50
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 8, v33
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v38
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 8, v32
; GFX11-TRUE16-NEXT: s_or_b32 s76, s77, s76
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s79
; GFX11-TRUE16-NEXT: s_and_b32 s78, s62, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s47, 8
; GFX11-TRUE16-NEXT: s_and_b32 s88, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s89, s13, 8
; GFX11-TRUE16-NEXT: s_or_b32 s78, s78, s79
; GFX11-TRUE16-NEXT: s_or_b32 s79, s88, s89
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v8.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v14, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v38
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 8, v32
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v83
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 8, v33
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v50
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s78, s79
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v39
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v13
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, v8 :: v_dual_lshlrev_b32 v17, 8, v39
; GFX11-TRUE16-NEXT: v_perm_b32 v13, v134, v96, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v14, v15
; GFX11-TRUE16-NEXT: v_perm_b32 v18, v116, v67, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v19, v130, v80, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v10
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v16, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v15.l
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v99, v65, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v15, v16
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v53, v34, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v99, v65, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v66, v35, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v16, v97, v48, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v17, v103, v54, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v71, v51, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v17, v103, v54, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v69, v36, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v20, v145, v112, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v21, v135, v81, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v18, v116, v67, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v84, v49, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v19, v130, v80, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v98, v52, 0xc0c0004
; GFX11-TRUE16-NEXT: s_and_b32 s78, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s77, s5, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v20, v145, v112, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v113, v55, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v21, v135, v81, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v118, v85, 0xc0c0004
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s77
; GFX11-TRUE16-NEXT: v_perm_b32 v22, v147, v100, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v23, v151, v115, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v118, v85, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v117
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 8, v64
; GFX11-TRUE16-NEXT: v_perm_b32 v23, v151, v115, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v164
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v119
; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v131
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v82
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v24
; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v149
; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v8, v25
; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v167
; GFX11-TRUE16-NEXT: s_and_b32 s78, s62, 0xff
; GFX11-TRUE16-NEXT: v_perm_b32 v25, v176, v148, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v26, v27
; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v149
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v101
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v144
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v8
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s47, 8
; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v26, v27
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v87
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 8, v128
; GFX11-TRUE16-NEXT: s_and_b32 s88, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s89, s13, 8
; GFX11-TRUE16-NEXT: s_or_b32 s78, s78, s79
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v25
; GFX11-TRUE16-NEXT: v_perm_b32 v25, v176, v148, 0xc0c0004
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 8, v87
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v128
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v8.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v28, v26
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v27
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v167
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v101
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v26
; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v160
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v102
; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v8, v27
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v28, v29
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v162
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 8, v132
; GFX11-TRUE16-NEXT: s_or_b32 s79, s88, s89
; GFX11-TRUE16-NEXT: v_perm_b32 v27, v177, v146, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v179
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s78, s79
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v150
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v28
; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v160
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v102
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v183, 8, v129
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v30, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v133
; GFX11-TRUE16-NEXT: v_and_b32_e32 v40, 0xff, v182
; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v8, v29
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xff, v166
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v30, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 8, v133
; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xff, v165
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s76 :: v_dual_and_b32 v41, 0xff, v182
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, v10
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v29, v30
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v183
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v183, 8, v178
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v165
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 8, v129
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v41, v29, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v178
; GFX11-TRUE16-NEXT: v_perm_b32 v29, v181, v163, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, s77
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v42, v30
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v40.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v41, v183
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v31.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v183, v8, v10
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s76
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v40, v31
; GFX11-TRUE16-NEXT: v_perm_b32 v31, v180, v161, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v42.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, s77
; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.h, v183.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v41.l
; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s75
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB93_3
; GFX11-TRUE16-NEXT: .LBB93_2: ; %cmp.true
@ -174987,10 +174972,9 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i
; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.h, v129.l
; GFX11-TRUE16-NEXT: v_mov_b16_e64 v31.h, v133.l
; GFX11-TRUE16-NEXT: .LBB93_3: ; %end
; GFX11-TRUE16-NEXT: s_clause 0x2 ; 12-byte Folded Reload
; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:320
; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:324
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:328
; GFX11-TRUE16-NEXT: s_clause 0x1 ; 8-byte Folded Reload
; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:320
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:324
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-TRUE16-NEXT: .LBB93_4:
@ -195225,10 +195209,9 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v1
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s9, v0
; GFX11-TRUE16-NEXT: s_mov_b32 s75, 0
; GFX11-TRUE16-NEXT: s_clause 0x2 ; 12-byte Folded Spill
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:328
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:324
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:320
; GFX11-TRUE16-NEXT: s_clause 0x1 ; 8-byte Folded Spill
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:324
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:320
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7)
; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v31
; GFX11-TRUE16-NEXT: s_and_b32 s76, vcc_lo, exec_lo
@ -195236,10 +195219,10 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, 0xc0c0004 :: v_dual_lshlrev_b32 v13, 8, v86
; GFX11-TRUE16-NEXT: s_and_b32 s77, s43, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s11, 8
; GFX11-TRUE16-NEXT: s_and_b32 s76, s63, 0xff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: s_and_b32 s76, s43, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s77, s11, 8
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 8, v37
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s2, s3, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s22, s23, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s26, s27, v8
@ -195253,12 +195236,12 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v6.l
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s40, s8, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s14, s7, v8
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s77
; GFX11-TRUE16-NEXT: v_perm_b32 v1, s16, s17, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s12, s6, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v4.l
; GFX11-TRUE16-NEXT: v_perm_b32 v4, s28, s29, v8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s77 :: v_dual_lshlrev_b32 v15, 8, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s76 :: v_dual_lshlrev_b32 v15, 8, v70
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s61, s45, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v6.l
@ -195266,130 +195249,124 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v7.l
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s72, s58, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v9.l
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s57, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s59, s44, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v68
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 8, v37
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s79
; GFX11-TRUE16-NEXT: s_and_b32 s79, s41, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s10, 8
; GFX11-TRUE16-NEXT: s_and_b32 s88, s41, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s89, s10, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v11, s73, s42, v8
; GFX11-TRUE16-NEXT: s_or_b32 s79, s79, s88
; GFX11-TRUE16-NEXT: s_or_b32 s88, s88, s89
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v12
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s79
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s88
; GFX11-TRUE16-NEXT: s_and_b32 s88, s74, 0xff
; GFX11-TRUE16-NEXT: s_and_b32 s77, s63, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s76, s57, 8
; GFX11-TRUE16-NEXT: s_and_b32 s78, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s5, 8
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v114
; GFX11-TRUE16-NEXT: s_and_b32 s79, s74, 0xff
; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v83
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s79, v13
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s88, v13
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v50
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 8, v33
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v38
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 8, v32
; GFX11-TRUE16-NEXT: s_or_b32 s76, s77, s76
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s79
; GFX11-TRUE16-NEXT: s_and_b32 s78, s62, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s47, 8
; GFX11-TRUE16-NEXT: s_and_b32 s88, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s89, s13, 8
; GFX11-TRUE16-NEXT: s_or_b32 s78, s78, s79
; GFX11-TRUE16-NEXT: s_or_b32 s79, s88, s89
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v8.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v14, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v38
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 8, v32
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v83
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 8, v33
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v50
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s78, s79
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v39
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v13
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, v8 :: v_dual_lshlrev_b32 v17, 8, v39
; GFX11-TRUE16-NEXT: v_perm_b32 v13, v134, v96, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v14, v15
; GFX11-TRUE16-NEXT: v_perm_b32 v18, v116, v67, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v19, v130, v80, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v10
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v16, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v15.l
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v99, v65, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v15, v16
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v53, v34, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v99, v65, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v66, v35, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v16, v97, v48, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v17, v103, v54, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v71, v51, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v17, v103, v54, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v69, v36, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v20, v145, v112, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v21, v135, v81, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v18, v116, v67, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v84, v49, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v19, v130, v80, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v98, v52, 0xc0c0004
; GFX11-TRUE16-NEXT: s_and_b32 s78, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s77, s5, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v20, v145, v112, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v10.l
; GFX11-TRUE16-NEXT: v_perm_b32 v10, v113, v55, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v21, v135, v81, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v8.l
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v118, v85, 0xc0c0004
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s77
; GFX11-TRUE16-NEXT: v_perm_b32 v22, v147, v100, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v23, v151, v115, 0xc0c0004
; GFX11-TRUE16-NEXT: v_perm_b32 v8, v118, v85, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v117
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 8, v64
; GFX11-TRUE16-NEXT: v_perm_b32 v23, v151, v115, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v164
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v119
; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v131
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v82
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v24
; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v149
; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v8, v25
; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v167
; GFX11-TRUE16-NEXT: s_and_b32 s78, s62, 0xff
; GFX11-TRUE16-NEXT: v_perm_b32 v25, v176, v148, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v26, v27
; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v149
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v101
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v144
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v8
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s47, 8
; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v26, v27
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v87
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 8, v128
; GFX11-TRUE16-NEXT: s_and_b32 s88, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s89, s13, 8
; GFX11-TRUE16-NEXT: s_or_b32 s78, s78, s79
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v25
; GFX11-TRUE16-NEXT: v_perm_b32 v25, v176, v148, 0xc0c0004
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 8, v87
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v128
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v8.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v28, v26
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v27
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v167
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v101
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v26
; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v160
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v102
; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v8, v27
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v28, v29
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v162
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 8, v132
; GFX11-TRUE16-NEXT: s_or_b32 s79, s88, s89
; GFX11-TRUE16-NEXT: v_perm_b32 v27, v177, v146, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v179
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s78, s79
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v150
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v28
; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v160
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v102
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v183, 8, v129
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v30, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v133
; GFX11-TRUE16-NEXT: v_and_b32_e32 v40, 0xff, v182
; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v8, v29
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xff, v166
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v30, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 8, v133
; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xff, v165
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s76 :: v_dual_and_b32 v41, 0xff, v182
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, v10
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v29, v30
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v183
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v183, 8, v178
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v165
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 8, v129
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v41, v29, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 8, v178
; GFX11-TRUE16-NEXT: v_perm_b32 v29, v181, v163, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, s77
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v42, v30
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v40.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v41, v183
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v31.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v183, v8, v10
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s76
; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v40, v31
; GFX11-TRUE16-NEXT: v_perm_b32 v31, v180, v161, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v42.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, s77
; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.h, v183.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v41.l
; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s75
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB97_3
; GFX11-TRUE16-NEXT: .LBB97_2: ; %cmp.true
@ -195622,10 +195599,9 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3
; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.h, v129.l
; GFX11-TRUE16-NEXT: v_mov_b16_e64 v31.h, v133.l
; GFX11-TRUE16-NEXT: .LBB97_3: ; %end
; GFX11-TRUE16-NEXT: s_clause 0x2 ; 12-byte Folded Reload
; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:320
; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:324
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:328
; GFX11-TRUE16-NEXT: s_clause 0x1 ; 8-byte Folded Reload
; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:320
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:324
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-TRUE16-NEXT: .LBB97_4:

View File

@ -71126,86 +71126,83 @@ define inreg <32 x i16> @bitcast_v64i8_to_v32i16_scalar(<64 x i8> inreg %a, i32
; GFX11-TRUE16-NEXT: s_and_b32 s76, vcc_lo, exec_lo
; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB99_4
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, 0xc0c0004 :: v_dual_lshlrev_b32 v13, 8, v34
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, 0xc0c0004 :: v_dual_lshlrev_b32 v13, 8, v50
; GFX11-TRUE16-NEXT: s_and_b32 s76, s62, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s77, s44, 8
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s8, 8
; GFX11-TRUE16-NEXT: s_and_b32 s78, s43, 0xff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s2, s3, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s22, s23, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s26, s27, v8
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s77
; GFX11-TRUE16-NEXT: s_and_b32 s77, s14, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s9, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v0, s0, s1, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v4, s18, s19, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v2, s20, s21, v8
; GFX11-TRUE16-NEXT: s_and_b32 s79, s43, 0xff
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s9, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v5.l
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s24, s25, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s10, s4, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s24, s25, v8
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s77
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s79
; GFX11-TRUE16-NEXT: s_and_b32 s78, s14, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s8, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v6.l
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s41, s7, v8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s77 :: v_dual_lshlrev_b32 v15, 8, v48
; GFX11-TRUE16-NEXT: s_or_b32 s77, s79, s78
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s15, s6, v8
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s79
; GFX11-TRUE16-NEXT: v_perm_b32 v1, s16, s17, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s13, s5, v8
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: s_and_b32 s77, s73, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s57, 8
; GFX11-TRUE16-NEXT: s_and_b32 s79, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s12, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v4.l
; GFX11-TRUE16-NEXT: v_perm_b32 v4, s28, s29, v8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s77 :: v_dual_lshlrev_b32 v15, 8, v48
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s60, s45, v8
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s78, s79, s88
; GFX11-TRUE16-NEXT: s_and_b32 s79, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s11, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v6.l
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s59, s40, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v7.l
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s63, s58, v8
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s78, s79, s88
; GFX11-TRUE16-NEXT: s_and_b32 s77, s73, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s57, 8
; GFX11-TRUE16-NEXT: s_and_b32 s79, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s12, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v9.l
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s61, s42, v8
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s78, s79, s88
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v38
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 8, v34
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s77, s78
; GFX11-TRUE16-NEXT: s_and_b32 s78, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s11, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v11, s72, s47, v8
; GFX11-TRUE16-NEXT: s_or_b32 s78, s78, s79
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v12
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s78
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 8, v50
; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v38
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v52
; GFX11-TRUE16-NEXT: s_and_b32 s78, s74, 0xff
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v49
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v12, v13
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s78, v10
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v14, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v36
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 8, v32
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v37
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, v10 :: v_dual_lshlrev_b32 v15, 8, v31
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v13, v14
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s78, v13
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v37
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 8, v31
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v8.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v14, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v36
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 8, v32
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v35
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v13
; GFX11-TRUE16-NEXT: v_perm_b32 v13, v53, v51, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v15
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v39, v33, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v18, v14
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v16, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v10.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s76
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v18.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v14, v15
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v16, v17
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v39, v33, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v10.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, s77
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v18.l
; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s75
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB99_3
; GFX11-TRUE16-NEXT: .LBB99_2: ; %cmp.true
@ -83887,86 +83884,83 @@ define inreg <32 x half> @bitcast_v64i8_to_v32f16_scalar(<64 x i8> inreg %a, i32
; GFX11-TRUE16-NEXT: s_and_b32 s76, vcc_lo, exec_lo
; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB107_4
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, 0xc0c0004 :: v_dual_lshlrev_b32 v13, 8, v34
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, 0xc0c0004 :: v_dual_lshlrev_b32 v13, 8, v50
; GFX11-TRUE16-NEXT: s_and_b32 s76, s62, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s77, s44, 8
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s8, 8
; GFX11-TRUE16-NEXT: s_and_b32 s78, s43, 0xff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s2, s3, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s22, s23, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s26, s27, v8
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s77
; GFX11-TRUE16-NEXT: s_and_b32 s77, s14, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s9, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v0, s0, s1, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v4, s18, s19, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v2, s20, s21, v8
; GFX11-TRUE16-NEXT: s_and_b32 s79, s43, 0xff
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s9, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v5.l
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s24, s25, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s10, s4, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s24, s25, v8
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s77
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s79
; GFX11-TRUE16-NEXT: s_and_b32 s78, s14, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s8, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v6.l
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s41, s7, v8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s77 :: v_dual_lshlrev_b32 v15, 8, v48
; GFX11-TRUE16-NEXT: s_or_b32 s77, s79, s78
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s15, s6, v8
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s79
; GFX11-TRUE16-NEXT: v_perm_b32 v1, s16, s17, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s13, s5, v8
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: s_and_b32 s77, s73, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s57, 8
; GFX11-TRUE16-NEXT: s_and_b32 s79, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s12, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v4.l
; GFX11-TRUE16-NEXT: v_perm_b32 v4, s28, s29, v8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s77 :: v_dual_lshlrev_b32 v15, 8, v48
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s60, s45, v8
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s78, s79, s88
; GFX11-TRUE16-NEXT: s_and_b32 s79, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s11, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v6.l
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s59, s40, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v7.l
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s63, s58, v8
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s78, s79, s88
; GFX11-TRUE16-NEXT: s_and_b32 s77, s73, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s57, 8
; GFX11-TRUE16-NEXT: s_and_b32 s79, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s12, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v9.l
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s61, s42, v8
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s78, s79, s88
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v38
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 8, v34
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s77, s78
; GFX11-TRUE16-NEXT: s_and_b32 s78, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s11, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v11, s72, s47, v8
; GFX11-TRUE16-NEXT: s_or_b32 s78, s78, s79
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v12
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s78
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 8, v50
; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v38
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v52
; GFX11-TRUE16-NEXT: s_and_b32 s78, s74, 0xff
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v49
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v12, v13
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s78, v10
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v14, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v36
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 8, v32
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v37
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, v10 :: v_dual_lshlrev_b32 v15, 8, v31
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v13, v14
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s78, v13
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v37
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 8, v31
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v8.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v14, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v36
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 8, v32
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v35
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v13
; GFX11-TRUE16-NEXT: v_perm_b32 v13, v53, v51, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v15
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v39, v33, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v18, v14
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v16, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v10.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s76
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v18.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v14, v15
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v16, v17
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v39, v33, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v10.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, s77
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v18.l
; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s75
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB107_3
; GFX11-TRUE16-NEXT: .LBB107_2: ; %cmp.true
@ -94593,86 +94587,83 @@ define inreg <32 x bfloat> @bitcast_v64i8_to_v32bf16_scalar(<64 x i8> inreg %a,
; GFX11-TRUE16-NEXT: s_and_b32 s76, vcc_lo, exec_lo
; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB111_4
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, 0xc0c0004 :: v_dual_lshlrev_b32 v13, 8, v34
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, 0xc0c0004 :: v_dual_lshlrev_b32 v13, 8, v50
; GFX11-TRUE16-NEXT: s_and_b32 s76, s62, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s77, s44, 8
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s8, 8
; GFX11-TRUE16-NEXT: s_and_b32 s78, s43, 0xff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s2, s3, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s22, s23, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s26, s27, v8
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s77
; GFX11-TRUE16-NEXT: s_and_b32 s77, s14, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s9, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v0, s0, s1, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v4, s18, s19, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v2, s20, s21, v8
; GFX11-TRUE16-NEXT: s_and_b32 s79, s43, 0xff
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s9, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v5.l
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s24, s25, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s10, s4, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v3, s24, s25, v8
; GFX11-TRUE16-NEXT: s_or_b32 s76, s76, s77
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s79
; GFX11-TRUE16-NEXT: s_and_b32 s78, s14, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s8, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v6.l
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s41, s7, v8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s77 :: v_dual_lshlrev_b32 v15, 8, v48
; GFX11-TRUE16-NEXT: s_or_b32 s77, s79, s78
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s15, s6, v8
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: s_or_b32 s77, s78, s79
; GFX11-TRUE16-NEXT: v_perm_b32 v1, s16, s17, v8
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s13, s5, v8
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s76, s76, s77
; GFX11-TRUE16-NEXT: s_and_b32 s77, s73, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s57, 8
; GFX11-TRUE16-NEXT: s_and_b32 s79, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s12, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v4.l
; GFX11-TRUE16-NEXT: v_perm_b32 v4, s28, s29, v8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s77 :: v_dual_lshlrev_b32 v15, 8, v48
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
; GFX11-TRUE16-NEXT: v_perm_b32 v5, s60, s45, v8
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s78, s79, s88
; GFX11-TRUE16-NEXT: s_and_b32 s79, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s11, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v6.l
; GFX11-TRUE16-NEXT: v_perm_b32 v6, s59, s40, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v7.l
; GFX11-TRUE16-NEXT: v_perm_b32 v7, s63, s58, v8
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s78, s79, s88
; GFX11-TRUE16-NEXT: s_and_b32 s77, s73, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s78, s57, 8
; GFX11-TRUE16-NEXT: s_and_b32 s79, s56, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s88, s12, 8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v9.l
; GFX11-TRUE16-NEXT: v_perm_b32 v9, s61, s42, v8
; GFX11-TRUE16-NEXT: s_or_b32 s77, s77, s78
; GFX11-TRUE16-NEXT: s_or_b32 s78, s79, s88
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v38
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 8, v34
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s77, s77, s78
; GFX11-TRUE16-NEXT: s_and_b32 s78, s46, 0xff
; GFX11-TRUE16-NEXT: s_lshl_b32 s79, s11, 8
; GFX11-TRUE16-NEXT: v_perm_b32 v11, s72, s47, v8
; GFX11-TRUE16-NEXT: s_or_b32 s78, s78, s79
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v12
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s78
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 8, v50
; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v38
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v52
; GFX11-TRUE16-NEXT: s_and_b32 s78, s74, 0xff
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v49
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v12, v13
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s78, v10
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v14, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v36
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 8, v32
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v8.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v37
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, v10 :: v_dual_lshlrev_b32 v15, 8, v31
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v13, v14
; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, s78, v13
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v10.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v37
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 8, v31
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v8.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v14, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v36
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 8, v32
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v35
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v13
; GFX11-TRUE16-NEXT: v_perm_b32 v13, v53, v51, 0xc0c0004
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v15
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v39, v33, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v18, v14
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v16, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v10.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s76
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v18.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v14, v15
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v16, v17
; GFX11-TRUE16-NEXT: v_perm_b32 v15, v39, v33, 0xc0c0004
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v10.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, s77
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v18.l
; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s75
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB111_3
; GFX11-TRUE16-NEXT: .LBB111_2: ; %cmp.true

View File

@ -11029,16 +11029,7 @@ define inreg <36 x i16> @bitcast_v18f32_to_v36i16_scalar(<18 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, s8 :: v_dual_mov_b32 v20, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, s6 :: v_dual_mov_b32 v18, s5
; GFX11-TRUE16-NEXT: .LBB29_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v21 :: v_dual_mov_b32 v20, v20
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v19 :: v_dual_mov_b32 v18, v18
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v33.l
@ -13803,16 +13794,7 @@ define inreg <36 x half> @bitcast_v18f32_to_v36f16_scalar(<18 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, s8 :: v_dual_mov_b32 v20, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, s6 :: v_dual_mov_b32 v18, s5
; GFX11-TRUE16-NEXT: .LBB33_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v21 :: v_dual_mov_b32 v20, v20
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v19 :: v_dual_mov_b32 v18, v18
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v33.l
@ -22786,24 +22768,24 @@ define inreg <36 x i16> @bitcast_v9f64_to_v36i16_scalar(<9 x double> inreg %a, i
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB49_5
; GFX11-TRUE16-NEXT: .LBB49_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr46
@ -22835,47 +22817,35 @@ define inreg <36 x i16> @bitcast_v9f64_to_v36i16_scalar(<9 x double> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s46 :: v_dual_mov_b32 v35, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s45 :: v_dual_mov_b32 v33, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s44 :: v_dual_mov_b32 v31, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s43 :: v_dual_mov_b32 v29, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s42 :: v_dual_mov_b32 v27, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s41 :: v_dual_mov_b32 v25, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s40 :: v_dual_mov_b32 v23, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s15 :: v_dual_mov_b32 v21, s6
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, s14 :: v_dual_mov_b32 v19, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s46 :: v_dual_mov_b32 v18, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s45 :: v_dual_mov_b32 v34, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s44 :: v_dual_mov_b32 v32, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s43 :: v_dual_mov_b32 v30, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s42 :: v_dual_mov_b32 v28, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s41 :: v_dual_mov_b32 v26, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, s40 :: v_dual_mov_b32 v24, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, s15 :: v_dual_mov_b32 v22, s6
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s13 :: v_dual_mov_b32 v20, s5
; GFX11-TRUE16-NEXT: .LBB49_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v22, v22
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v35, v35
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v32.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v33 :: v_dual_mov_b32 v27, v27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v28 :: v_dual_mov_b32 v25, v25
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v26 :: v_dual_mov_b32 v21, v21
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, v24 :: v_dual_mov_b32 v19, v19
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v22, v23
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v20, v20
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v18, v18
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v34.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v20.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v21.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v21.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v19.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v18.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v19.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v20.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v9f64_to_v36i16_scalar:
@ -25491,24 +25461,24 @@ define inreg <36 x half> @bitcast_v9f64_to_v36f16_scalar(<9 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB53_5
; GFX11-TRUE16-NEXT: .LBB53_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr46
@ -25540,47 +25510,35 @@ define inreg <36 x half> @bitcast_v9f64_to_v36f16_scalar(<9 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s46 :: v_dual_mov_b32 v35, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s45 :: v_dual_mov_b32 v33, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s44 :: v_dual_mov_b32 v31, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s43 :: v_dual_mov_b32 v29, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s42 :: v_dual_mov_b32 v27, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s41 :: v_dual_mov_b32 v25, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s40 :: v_dual_mov_b32 v23, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s15 :: v_dual_mov_b32 v21, s6
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, s14 :: v_dual_mov_b32 v19, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s46 :: v_dual_mov_b32 v18, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s45 :: v_dual_mov_b32 v34, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s44 :: v_dual_mov_b32 v32, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s43 :: v_dual_mov_b32 v30, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s42 :: v_dual_mov_b32 v28, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s41 :: v_dual_mov_b32 v26, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, s40 :: v_dual_mov_b32 v24, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, s15 :: v_dual_mov_b32 v22, s6
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s13 :: v_dual_mov_b32 v20, s5
; GFX11-TRUE16-NEXT: .LBB53_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v22, v22
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v35, v35
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v32.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v33 :: v_dual_mov_b32 v27, v27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v28 :: v_dual_mov_b32 v25, v25
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v26 :: v_dual_mov_b32 v21, v21
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, v24 :: v_dual_mov_b32 v19, v19
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v22, v23
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v20, v20
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v18, v18
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v34.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v20.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v21.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v21.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v19.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v18.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v19.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v20.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v9f64_to_v36f16_scalar:
@ -28848,18 +28806,10 @@ define inreg <36 x half> @bitcast_v36i16_to_v36f16_scalar(<36 x i16> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s8 :: v_dual_mov_b32 v33, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s4 :: v_dual_mov_b32 v35, s5
; GFX11-TRUE16-NEXT: .LBB57_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v21 :: v_dual_mov_b32 v20, v20
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v19 :: v_dual_mov_b32 v18, v18
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v34.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v31.l
@ -30356,18 +30306,10 @@ define inreg <36 x i16> @bitcast_v36f16_to_v36i16_scalar(<36 x half> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s8 :: v_dual_mov_b32 v33, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s4 :: v_dual_mov_b32 v35, s5
; GFX11-TRUE16-NEXT: .LBB59_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v21 :: v_dual_mov_b32 v20, v20
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v19 :: v_dual_mov_b32 v18, v18
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v34.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v31.l

View File

@ -12070,17 +12070,7 @@ define inreg <40 x i16> @bitcast_v20f32_to_v40i16_scalar(<20 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s10 :: v_dual_mov_b32 v22, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, s8 :: v_dual_mov_b32 v20, s7
; GFX11-TRUE16-NEXT: .LBB29_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v21 :: v_dual_mov_b32 v20, v20
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v37.l
@ -15156,17 +15146,7 @@ define inreg <40 x half> @bitcast_v20f32_to_v40f16_scalar(<20 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s10 :: v_dual_mov_b32 v22, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, s8 :: v_dual_mov_b32 v20, s7
; GFX11-TRUE16-NEXT: .LBB33_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v21 :: v_dual_mov_b32 v20, v20
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v37.l
@ -25103,26 +25083,26 @@ define inreg <40 x i16> @bitcast_v10f64_to_v40i16_scalar(<10 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB49_5
; GFX11-TRUE16-NEXT: .LBB49_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr58
@ -25157,51 +25137,38 @@ define inreg <40 x i16> @bitcast_v10f64_to_v40i16_scalar(<10 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, s4 :: v_dual_mov_b32 v19, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s58 :: v_dual_mov_b32 v39, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s57 :: v_dual_mov_b32 v37, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s56 :: v_dual_mov_b32 v35, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s47 :: v_dual_mov_b32 v33, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s46 :: v_dual_mov_b32 v31, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s45 :: v_dual_mov_b32 v29, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s44 :: v_dual_mov_b32 v27, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s43 :: v_dual_mov_b32 v25, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s42 :: v_dual_mov_b32 v23, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s41 :: v_dual_mov_b32 v21, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s58 :: v_dual_mov_b32 v20, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s57 :: v_dual_mov_b32 v38, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s56 :: v_dual_mov_b32 v36, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s47 :: v_dual_mov_b32 v34, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s46 :: v_dual_mov_b32 v32, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s45 :: v_dual_mov_b32 v30, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s44 :: v_dual_mov_b32 v28, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s43 :: v_dual_mov_b32 v26, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, s42 :: v_dual_mov_b32 v24, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s40 :: v_dual_mov_b32 v22, s7
; GFX11-TRUE16-NEXT: .LBB49_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v33, v33
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v38 :: v_dual_mov_b32 v39, v39
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v36 :: v_dual_mov_b32 v37, v37
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v34.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v35 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v27, v27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v28 :: v_dual_mov_b32 v23, v23
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v26 :: v_dual_mov_b32 v21, v21
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v24, v25
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v22, v22
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v20, v20
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v38.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v21.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v20.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v21.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v22.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v10f64_to_v40i16_scalar:
@ -28112,26 +28079,26 @@ define inreg <40 x half> @bitcast_v10f64_to_v40f16_scalar(<10 x double> inreg %a
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB53_5
; GFX11-TRUE16-NEXT: .LBB53_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr58
@ -28166,51 +28133,38 @@ define inreg <40 x half> @bitcast_v10f64_to_v40f16_scalar(<10 x double> inreg %a
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, s4 :: v_dual_mov_b32 v19, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s58 :: v_dual_mov_b32 v39, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s57 :: v_dual_mov_b32 v37, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s56 :: v_dual_mov_b32 v35, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s47 :: v_dual_mov_b32 v33, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s46 :: v_dual_mov_b32 v31, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s45 :: v_dual_mov_b32 v29, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s44 :: v_dual_mov_b32 v27, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s43 :: v_dual_mov_b32 v25, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s42 :: v_dual_mov_b32 v23, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s41 :: v_dual_mov_b32 v21, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s58 :: v_dual_mov_b32 v20, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s57 :: v_dual_mov_b32 v38, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s56 :: v_dual_mov_b32 v36, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s47 :: v_dual_mov_b32 v34, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s46 :: v_dual_mov_b32 v32, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s45 :: v_dual_mov_b32 v30, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s44 :: v_dual_mov_b32 v28, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s43 :: v_dual_mov_b32 v26, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, s42 :: v_dual_mov_b32 v24, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s40 :: v_dual_mov_b32 v22, s7
; GFX11-TRUE16-NEXT: .LBB53_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v33, v33
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v38 :: v_dual_mov_b32 v39, v39
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v36 :: v_dual_mov_b32 v37, v37
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v34.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v35 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v27, v27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v28 :: v_dual_mov_b32 v23, v23
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v26 :: v_dual_mov_b32 v21, v21
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v24, v25
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v22, v22
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v20, v20
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v38.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v21.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v20.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v21.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v22.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v10f64_to_v40f16_scalar:
@ -31893,19 +31847,10 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s6 :: v_dual_mov_b32 v37, s4
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s5 :: v_dual_mov_b32 v39, s7
; GFX11-TRUE16-NEXT: .LBB57_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v21 :: v_dual_mov_b32 v20, v20
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v38.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v35.l
@ -33566,19 +33511,10 @@ define inreg <40 x i16> @bitcast_v40f16_to_v40i16_scalar(<40 x half> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s6 :: v_dual_mov_b32 v37, s4
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s5 :: v_dual_mov_b32 v39, s7
; GFX11-TRUE16-NEXT: .LBB59_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v21 :: v_dual_mov_b32 v20, v20
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v38.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v35.l

View File

@ -13030,18 +13030,7 @@ define inreg <44 x i16> @bitcast_v22f32_to_v44i16_scalar(<22 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s12 :: v_dual_mov_b32 v24, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s10 :: v_dual_mov_b32 v22, s9
; GFX11-TRUE16-NEXT: .LBB29_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v49.l
@ -16425,18 +16414,7 @@ define inreg <44 x half> @bitcast_v22f32_to_v44f16_scalar(<22 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s12 :: v_dual_mov_b32 v24, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s10 :: v_dual_mov_b32 v22, s9
; GFX11-TRUE16-NEXT: .LBB33_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v49.l
@ -27322,26 +27300,26 @@ define inreg <44 x i16> @bitcast_v11f64_to_v44i16_scalar(<11 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB49_5
@ -27381,54 +27359,41 @@ define inreg <44 x i16> @bitcast_v11f64_to_v44i16_scalar(<11 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, s6 :: v_dual_mov_b32 v19, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v21, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s62 :: v_dual_mov_b32 v48, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s60 :: v_dual_mov_b32 v33, s58
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v29, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s57 :: v_dual_mov_b32 v51, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s47 :: v_dual_mov_b32 v39, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s46 :: v_dual_mov_b32 v37, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s45 :: v_dual_mov_b32 v35, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s44 :: v_dual_mov_b32 v31, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s42 :: v_dual_mov_b32 v27, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s14 :: v_dual_mov_b32 v25, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s12 :: v_dual_mov_b32 v23, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s62 :: v_dual_mov_b32 v22, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s61 :: v_dual_mov_b32 v50, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s60 :: v_dual_mov_b32 v48, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s59 :: v_dual_mov_b32 v38, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s58 :: v_dual_mov_b32 v36, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s57 :: v_dual_mov_b32 v34, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s56 :: v_dual_mov_b32 v32, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s47 :: v_dual_mov_b32 v30, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s46 :: v_dual_mov_b32 v28, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s45 :: v_dual_mov_b32 v26, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, s43 :: v_dual_mov_b32 v24, s9
; GFX11-TRUE16-NEXT: .LBB49_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v36 :: v_dual_mov_b32 v33, v33
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v26 :: v_dual_mov_b32 v49, v49
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v48 :: v_dual_mov_b32 v51, v51
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v38 :: v_dual_mov_b32 v39, v39
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v36.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v37 :: v_dual_mov_b32 v35, v35
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v28 :: v_dual_mov_b32 v25, v25
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v23, v23
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v26, v27
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v24, v24
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v22, v22
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v24.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v11f64_to_v44i16_scalar:
@ -30631,26 +30596,26 @@ define inreg <44 x half> @bitcast_v11f64_to_v44f16_scalar(<11 x double> inreg %a
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB53_5
@ -30690,54 +30655,41 @@ define inreg <44 x half> @bitcast_v11f64_to_v44f16_scalar(<11 x double> inreg %a
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, s6 :: v_dual_mov_b32 v19, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v21, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s62 :: v_dual_mov_b32 v48, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s60 :: v_dual_mov_b32 v33, s58
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v29, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s57 :: v_dual_mov_b32 v51, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s47 :: v_dual_mov_b32 v39, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s46 :: v_dual_mov_b32 v37, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s45 :: v_dual_mov_b32 v35, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s44 :: v_dual_mov_b32 v31, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s42 :: v_dual_mov_b32 v27, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s14 :: v_dual_mov_b32 v25, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s12 :: v_dual_mov_b32 v23, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s62 :: v_dual_mov_b32 v22, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s61 :: v_dual_mov_b32 v50, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s60 :: v_dual_mov_b32 v48, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s59 :: v_dual_mov_b32 v38, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s58 :: v_dual_mov_b32 v36, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s57 :: v_dual_mov_b32 v34, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s56 :: v_dual_mov_b32 v32, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s47 :: v_dual_mov_b32 v30, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s46 :: v_dual_mov_b32 v28, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, s45 :: v_dual_mov_b32 v26, s10
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, s43 :: v_dual_mov_b32 v24, s9
; GFX11-TRUE16-NEXT: .LBB53_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v36 :: v_dual_mov_b32 v33, v33
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v26 :: v_dual_mov_b32 v49, v49
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v48 :: v_dual_mov_b32 v51, v51
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v38 :: v_dual_mov_b32 v39, v39
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v36.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v37 :: v_dual_mov_b32 v35, v35
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v28 :: v_dual_mov_b32 v25, v25
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v23, v23
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v26, v27
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v24, v24
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v22, v22
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v22.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v23.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v24.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v11f64_to_v44f16_scalar:
@ -34831,20 +34783,10 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s7 :: v_dual_mov_b32 v49, s6
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s4 :: v_dual_mov_b32 v51, s5
; GFX11-TRUE16-NEXT: .LBB57_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v50.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v39.l
@ -36673,20 +36615,10 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s7 :: v_dual_mov_b32 v49, s6
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s4 :: v_dual_mov_b32 v51, s5
; GFX11-TRUE16-NEXT: .LBB59_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v23 :: v_dual_mov_b32 v22, v22
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v50.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v39.l

View File

@ -14033,19 +14033,7 @@ define inreg <48 x i16> @bitcast_v24f32_to_v48i16_scalar(<24 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s14 :: v_dual_mov_b32 v26, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s12 :: v_dual_mov_b32 v24, s11
; GFX11-TRUE16-NEXT: .LBB29_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v53.l
@ -17752,19 +17740,7 @@ define inreg <48 x half> @bitcast_v24f32_to_v48f16_scalar(<24 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s14 :: v_dual_mov_b32 v26, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s12 :: v_dual_mov_b32 v24, s11
; GFX11-TRUE16-NEXT: .LBB33_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v53.l
@ -29639,26 +29615,26 @@ define inreg <48 x i16> @bitcast_v12f64_to_v48i16_scalar(<12 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB49_5
@ -29701,56 +29677,42 @@ define inreg <48 x i16> @bitcast_v12f64_to_v48i16_scalar(<12 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, s8 :: v_dual_mov_b32 v19, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v21, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s6 :: v_dual_mov_b32 v23, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, s74 :: v_dual_mov_b32 v52, s73
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s72 :: v_dual_mov_b32 v48, s63
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s62 :: v_dual_mov_b32 v35, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s60 :: v_dual_mov_b32 v31, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s58 :: v_dual_mov_b32 v25, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s57 :: v_dual_mov_b32 v55, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s47 :: v_dual_mov_b32 v51, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s45 :: v_dual_mov_b32 v39, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s43 :: v_dual_mov_b32 v37, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s40 :: v_dual_mov_b32 v33, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s14 :: v_dual_mov_b32 v29, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s12 :: v_dual_mov_b32 v26, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, s74 :: v_dual_mov_b32 v24, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, s73 :: v_dual_mov_b32 v54, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s72 :: v_dual_mov_b32 v52, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s63 :: v_dual_mov_b32 v50, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s62 :: v_dual_mov_b32 v48, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s61 :: v_dual_mov_b32 v38, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s60 :: v_dual_mov_b32 v36, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s59 :: v_dual_mov_b32 v34, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s58 :: v_dual_mov_b32 v32, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s57 :: v_dual_mov_b32 v30, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s56 :: v_dual_mov_b32 v28, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, s46 :: v_dual_mov_b32 v26, s11
; GFX11-TRUE16-NEXT: .LBB49_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v38 :: v_dual_mov_b32 v35, v35
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v28 :: v_dual_mov_b32 v53, v53
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v52 :: v_dual_mov_b32 v55, v55
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v54 :: v_dual_mov_b32 v49, v49
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v48 :: v_dual_mov_b32 v51, v51
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v50, v50
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v38.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v39 :: v_dual_mov_b32 v37, v37
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v36 :: v_dual_mov_b32 v33, v33
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v25, v25
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v27, v27
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v29
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v24, v24
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v26, v26
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v26.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
@ -33264,26 +33226,26 @@ define inreg <48 x half> @bitcast_v12f64_to_v48f16_scalar(<12 x double> inreg %a
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB53_5
@ -33326,56 +33288,42 @@ define inreg <48 x half> @bitcast_v12f64_to_v48f16_scalar(<12 x double> inreg %a
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, s8 :: v_dual_mov_b32 v19, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v21, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s6 :: v_dual_mov_b32 v23, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, s74 :: v_dual_mov_b32 v52, s73
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s72 :: v_dual_mov_b32 v48, s63
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s62 :: v_dual_mov_b32 v35, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s60 :: v_dual_mov_b32 v31, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s58 :: v_dual_mov_b32 v25, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s57 :: v_dual_mov_b32 v55, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s47 :: v_dual_mov_b32 v51, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s45 :: v_dual_mov_b32 v39, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s43 :: v_dual_mov_b32 v37, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s40 :: v_dual_mov_b32 v33, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s14 :: v_dual_mov_b32 v29, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s12 :: v_dual_mov_b32 v26, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, s74 :: v_dual_mov_b32 v24, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, s73 :: v_dual_mov_b32 v54, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s72 :: v_dual_mov_b32 v52, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s63 :: v_dual_mov_b32 v50, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s62 :: v_dual_mov_b32 v48, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s61 :: v_dual_mov_b32 v38, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s60 :: v_dual_mov_b32 v36, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s59 :: v_dual_mov_b32 v34, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s58 :: v_dual_mov_b32 v32, s14
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s57 :: v_dual_mov_b32 v30, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, s56 :: v_dual_mov_b32 v28, s12
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, s46 :: v_dual_mov_b32 v26, s11
; GFX11-TRUE16-NEXT: .LBB53_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v38 :: v_dual_mov_b32 v35, v35
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v28 :: v_dual_mov_b32 v53, v53
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v52 :: v_dual_mov_b32 v55, v55
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v54 :: v_dual_mov_b32 v49, v49
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v48 :: v_dual_mov_b32 v51, v51
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v50, v50
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v38.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v39 :: v_dual_mov_b32 v37, v37
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v36 :: v_dual_mov_b32 v33, v33
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v25, v25
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v27, v27
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v29
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v24, v24
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v26, v26
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v25.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v24.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v26.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
@ -37883,21 +37831,10 @@ define inreg <48 x half> @bitcast_v48i16_to_v48f16_scalar(<48 x i16> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s6 :: v_dual_mov_b32 v53, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s4 :: v_dual_mov_b32 v55, s5
; GFX11-TRUE16-NEXT: .LBB57_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v51.l
@ -39897,21 +39834,10 @@ define inreg <48 x i16> @bitcast_v48f16_to_v48i16_scalar(<48 x half> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s6 :: v_dual_mov_b32 v53, s8
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s4 :: v_dual_mov_b32 v55, s5
; GFX11-TRUE16-NEXT: .LBB59_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v25 :: v_dual_mov_b32 v24, v24
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v51.l

View File

@ -15164,20 +15164,7 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s40 :: v_dual_mov_b32 v28, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s14 :: v_dual_mov_b32 v26, s13
; GFX11-TRUE16-NEXT: .LBB29_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v66.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v65.l
@ -19274,20 +19261,7 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s40 :: v_dual_mov_b32 v28, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, s14 :: v_dual_mov_b32 v26, s13
; GFX11-TRUE16-NEXT: .LBB33_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v66.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v65.l
@ -32283,30 +32257,30 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v24
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB49_5
; GFX11-TRUE16-NEXT: .LBB49_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr78
@ -32350,60 +32324,45 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v21, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s6 :: v_dual_mov_b32 v23, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s8 :: v_dual_mov_b32 v25, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, s78 :: v_dual_mov_b32 v54, s76
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, s77 :: v_dual_mov_b32 v50, s74
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, s75 :: v_dual_mov_b32 v48, s73
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s72 :: v_dual_mov_b32 v36, s63
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s62 :: v_dual_mov_b32 v32, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s60 :: v_dual_mov_b32 v27, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s58 :: v_dual_mov_b32 v67, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s56 :: v_dual_mov_b32 v53, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s47 :: v_dual_mov_b32 v49, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s45 :: v_dual_mov_b32 v39, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s42 :: v_dual_mov_b32 v35, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s40 :: v_dual_mov_b32 v31, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s14 :: v_dual_mov_b32 v28, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s78 :: v_dual_mov_b32 v29, s60
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s77 :: v_dual_mov_b32 v27, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s76 :: v_dual_mov_b32 v67, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s75 :: v_dual_mov_b32 v65, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s74 :: v_dual_mov_b32 v55, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s73 :: v_dual_mov_b32 v53, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s72 :: v_dual_mov_b32 v51, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s63 :: v_dual_mov_b32 v49, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s62 :: v_dual_mov_b32 v39, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s61 :: v_dual_mov_b32 v37, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s58 :: v_dual_mov_b32 v35, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s56 :: v_dual_mov_b32 v33, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s14 :: v_dual_mov_b32 v28, s13
; GFX11-TRUE16-NEXT: .LBB49_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v48 :: v_dual_mov_b32 v37, v37
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v65, v65
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, v66 :: v_dual_mov_b32 v67, v67
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v54 :: v_dual_mov_b32 v55, v55
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v64 :: v_dual_mov_b32 v51, v51
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v50 :: v_dual_mov_b32 v53, v53
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v52, v52
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v48.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v49 :: v_dual_mov_b32 v39, v39
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v36 :: v_dual_mov_b32 v33, v33
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v38 :: v_dual_mov_b32 v35, v35
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v27, v27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v31
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v26, v26
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v28
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v65.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v66.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v28.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
@ -36295,30 +36254,30 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v24
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB53_5
; GFX11-TRUE16-NEXT: .LBB53_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr78
@ -36362,60 +36321,45 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v21, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s6 :: v_dual_mov_b32 v23, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s8 :: v_dual_mov_b32 v25, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, s78 :: v_dual_mov_b32 v54, s76
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, s77 :: v_dual_mov_b32 v50, s74
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, s75 :: v_dual_mov_b32 v48, s73
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s72 :: v_dual_mov_b32 v36, s63
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s62 :: v_dual_mov_b32 v32, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s60 :: v_dual_mov_b32 v27, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s58 :: v_dual_mov_b32 v67, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s56 :: v_dual_mov_b32 v53, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s47 :: v_dual_mov_b32 v49, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s45 :: v_dual_mov_b32 v39, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s42 :: v_dual_mov_b32 v35, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s40 :: v_dual_mov_b32 v31, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s14 :: v_dual_mov_b32 v28, s13
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s78 :: v_dual_mov_b32 v29, s60
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s77 :: v_dual_mov_b32 v27, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s76 :: v_dual_mov_b32 v67, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s75 :: v_dual_mov_b32 v65, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s74 :: v_dual_mov_b32 v55, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s73 :: v_dual_mov_b32 v53, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s72 :: v_dual_mov_b32 v51, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s63 :: v_dual_mov_b32 v49, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s62 :: v_dual_mov_b32 v39, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s61 :: v_dual_mov_b32 v37, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s58 :: v_dual_mov_b32 v35, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s56 :: v_dual_mov_b32 v33, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s14 :: v_dual_mov_b32 v28, s13
; GFX11-TRUE16-NEXT: .LBB53_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v48 :: v_dual_mov_b32 v37, v37
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v30 :: v_dual_mov_b32 v65, v65
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, v66 :: v_dual_mov_b32 v67, v67
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v54 :: v_dual_mov_b32 v55, v55
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v64 :: v_dual_mov_b32 v51, v51
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v50 :: v_dual_mov_b32 v53, v53
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v52, v52
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v48.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v49 :: v_dual_mov_b32 v39, v39
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v36 :: v_dual_mov_b32 v33, v33
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v38 :: v_dual_mov_b32 v35, v35
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v27, v27
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v34 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v31
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v26, v26
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v28
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v65.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v66.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v27.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v26.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v28.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
@ -41361,22 +41305,10 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s6 :: v_dual_mov_b32 v65, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s4 :: v_dual_mov_b32 v67, s7
; GFX11-TRUE16-NEXT: .LBB57_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v66.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v55.l
@ -43584,22 +43516,10 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s6 :: v_dual_mov_b32 v65, s5
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s4 :: v_dual_mov_b32 v67, s7
; GFX11-TRUE16-NEXT: .LBB59_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v27 :: v_dual_mov_b32 v26, v26
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v66.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v55.l

View File

@ -16404,21 +16404,7 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s42 :: v_dual_mov_b32 v30, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s40 :: v_dual_mov_b32 v28, s15
; GFX11-TRUE16-NEXT: .LBB29_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v70.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v69.l
@ -20982,21 +20968,7 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s42 :: v_dual_mov_b32 v30, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s40 :: v_dual_mov_b32 v28, s15
; GFX11-TRUE16-NEXT: .LBB33_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v70.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v69.l
@ -35231,34 +35203,34 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v27
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v27
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v26
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v24
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB49_5
; GFX11-TRUE16-NEXT: .LBB49_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr90
@ -35305,65 +35277,51 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s6 :: v_dual_mov_b32 v23, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s8 :: v_dual_mov_b32 v25, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s10 :: v_dual_mov_b32 v27, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, s90 :: v_dual_mov_b32 v64, s79
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, s89 :: v_dual_mov_b32 v52, s77
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, s88 :: v_dual_mov_b32 v50, s76
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, s78 :: v_dual_mov_b32 v38, s74
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s75 :: v_dual_mov_b32 v34, s72
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s73 :: v_dual_mov_b32 v32, s63
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s62 :: v_dual_mov_b32 v28, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, s60 :: v_dual_mov_b32 v70, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s58 :: v_dual_mov_b32 v55, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s57 :: v_dual_mov_b32 v51, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s47 :: v_dual_mov_b32 v49, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s44 :: v_dual_mov_b32 v37, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s42 :: v_dual_mov_b32 v33, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s40 :: v_dual_mov_b32 v30, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s90 :: v_dual_mov_b32 v29, s62
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s89 :: v_dual_mov_b32 v71, s60
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s88 :: v_dual_mov_b32 v69, s58
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s79 :: v_dual_mov_b32 v67, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s78 :: v_dual_mov_b32 v65, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s77 :: v_dual_mov_b32 v55, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s76 :: v_dual_mov_b32 v53, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s75 :: v_dual_mov_b32 v51, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s74 :: v_dual_mov_b32 v49, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s73 :: v_dual_mov_b32 v39, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s72 :: v_dual_mov_b32 v37, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s63 :: v_dual_mov_b32 v35, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s61 :: v_dual_mov_b32 v33, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s59 :: v_dual_mov_b32 v31, s15
; GFX11-TRUE16-NEXT: .LBB49_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v67.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v70 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v50.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v51 :: v_dual_mov_b32 v49, v49
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v32.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v33 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v28
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v30
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v69.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v66.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v66.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v70.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v69.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v31.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v14f64_to_v56i16_scalar:
@ -39704,34 +39662,34 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v27
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v27
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v26
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v24
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB53_5
; GFX11-TRUE16-NEXT: .LBB53_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr90
@ -39778,65 +39736,51 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, s6 :: v_dual_mov_b32 v23, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s8 :: v_dual_mov_b32 v25, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s10 :: v_dual_mov_b32 v27, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, s90 :: v_dual_mov_b32 v64, s79
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, s89 :: v_dual_mov_b32 v52, s77
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, s88 :: v_dual_mov_b32 v50, s76
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, s78 :: v_dual_mov_b32 v38, s74
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s75 :: v_dual_mov_b32 v34, s72
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s73 :: v_dual_mov_b32 v32, s63
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, s62 :: v_dual_mov_b32 v28, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, s60 :: v_dual_mov_b32 v70, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s58 :: v_dual_mov_b32 v55, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s57 :: v_dual_mov_b32 v51, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s47 :: v_dual_mov_b32 v49, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s44 :: v_dual_mov_b32 v37, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s42 :: v_dual_mov_b32 v33, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s40 :: v_dual_mov_b32 v30, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s90 :: v_dual_mov_b32 v29, s62
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s89 :: v_dual_mov_b32 v71, s60
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s88 :: v_dual_mov_b32 v69, s58
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s79 :: v_dual_mov_b32 v67, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s78 :: v_dual_mov_b32 v65, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s77 :: v_dual_mov_b32 v55, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s76 :: v_dual_mov_b32 v53, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s75 :: v_dual_mov_b32 v51, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s74 :: v_dual_mov_b32 v49, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s73 :: v_dual_mov_b32 v39, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s72 :: v_dual_mov_b32 v37, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s63 :: v_dual_mov_b32 v35, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s61 :: v_dual_mov_b32 v33, s40
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s59 :: v_dual_mov_b32 v31, s15
; GFX11-TRUE16-NEXT: .LBB53_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v32 :: v_dual_mov_b32 v29, v29
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v67.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v70 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v50.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v51 :: v_dual_mov_b32 v49, v49
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v32.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v33 :: v_dual_mov_b32 v31, v31
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v28
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v30
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v69.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v66.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v66.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v70.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v69.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v29.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v28.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v31.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v14f64_to_v56f16_scalar:
@ -45285,23 +45229,10 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s8 :: v_dual_mov_b32 v69, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s4 :: v_dual_mov_b32 v71, s5
; GFX11-TRUE16-NEXT: .LBB57_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v70.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v69.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v67.l
@ -47722,23 +47653,10 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s8 :: v_dual_mov_b32 v69, s7
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s4 :: v_dual_mov_b32 v71, s5
; GFX11-TRUE16-NEXT: .LBB59_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v29 :: v_dual_mov_b32 v28, v28
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v70.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v69.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v67.l

View File

@ -17545,22 +17545,7 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s44 :: v_dual_mov_b32 v32, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s42 :: v_dual_mov_b32 v30, s41
; GFX11-TRUE16-NEXT: .LBB29_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v83, v83 :: v_dual_mov_b32 v82, v82
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v81, v81 :: v_dual_mov_b32 v80, v80
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v83.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v82.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v81.l
@ -22434,22 +22419,7 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s44 :: v_dual_mov_b32 v32, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s42 :: v_dual_mov_b32 v30, s41
; GFX11-TRUE16-NEXT: .LBB33_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v83, v83 :: v_dual_mov_b32 v82, v82
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v81, v81 :: v_dual_mov_b32 v80, v80
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v83.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v82.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v81.l
@ -37743,36 +37713,36 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v29
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v29
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v28
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v27
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v27
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v26
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v24
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v24
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v81, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v82, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v83, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v81, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB49_5
; GFX11-TRUE16-NEXT: .LBB49_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr94
@ -37822,72 +37792,54 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a,
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s8 :: v_dual_mov_b32 v25, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s10 :: v_dual_mov_b32 v27, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s14 :: v_dual_mov_b32 v29, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v81, s94 :: v_dual_mov_b32 v30, s72
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, s93 :: v_dual_mov_b32 v82, s62
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, s92 :: v_dual_mov_b32 v80, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, s91 :: v_dual_mov_b32 v70, s60
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, s90 :: v_dual_mov_b32 v68, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, s89 :: v_dual_mov_b32 v66, s58
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, s88 :: v_dual_mov_b32 v64, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, s79 :: v_dual_mov_b32 v54, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s78 :: v_dual_mov_b32 v52, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s77 :: v_dual_mov_b32 v50, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s76 :: v_dual_mov_b32 v48, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s75 :: v_dual_mov_b32 v38, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s74 :: v_dual_mov_b32 v36, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s73 :: v_dual_mov_b32 v34, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v83, s63 :: v_dual_mov_b32 v32, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s94 :: v_dual_mov_b32 v31, s73
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s93 :: v_dual_mov_b32 v83, s63
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s92 :: v_dual_mov_b32 v81, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s91 :: v_dual_mov_b32 v71, s60
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s90 :: v_dual_mov_b32 v69, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s89 :: v_dual_mov_b32 v67, s58
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s88 :: v_dual_mov_b32 v65, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s79 :: v_dual_mov_b32 v55, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s78 :: v_dual_mov_b32 v53, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s77 :: v_dual_mov_b32 v51, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s76 :: v_dual_mov_b32 v49, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s75 :: v_dual_mov_b32 v39, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s74 :: v_dual_mov_b32 v37, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s72 :: v_dual_mov_b32 v35, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v82, s62 :: v_dual_mov_b32 v33, s41
; GFX11-TRUE16-NEXT: .LBB49_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v51, v51
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v39, v39
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v33, v33
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v81, v81 :: v_dual_mov_b32 v82, v82
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v83, v83 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v69.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v80 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v65.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v66 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v51, v54
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v49, v49
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v37, v37
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v39, v48
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v35, v35
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v33, v36
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v31, v31
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v81.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v80.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v83.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v70.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v82.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v69.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v70.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v81.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v66.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v69.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v33.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v15f64_to_v60i16_scalar:
@ -42522,36 +42474,36 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a
; GFX11-TRUE16-NEXT: v_add_f64 v[4:5], s[16:17], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[2:3], s[2:3], 1.0
; GFX11-TRUE16-NEXT: v_add_f64 v[0:1], s[0:1], 1.0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v29
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v29
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v28
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v27
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v27
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v26
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v24
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v25
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v24
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v23
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v22
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v21
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v20
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v19
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v18
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v17
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v16
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v15
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v13
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v12
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v11
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v9
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v8
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v7
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v6
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v81, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v4
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v82, 16, v3
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v2
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v83, 16, v1
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v81, 16, v0
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v0
; GFX11-TRUE16-NEXT: s_branch .LBB53_5
; GFX11-TRUE16-NEXT: .LBB53_3:
; GFX11-TRUE16-NEXT: ; implicit-def: $sgpr94
@ -42601,72 +42553,54 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, s8 :: v_dual_mov_b32 v25, s9
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, s10 :: v_dual_mov_b32 v27, s11
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s14 :: v_dual_mov_b32 v29, s15
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v81, s94 :: v_dual_mov_b32 v30, s72
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, s93 :: v_dual_mov_b32 v82, s62
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, s92 :: v_dual_mov_b32 v80, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, s91 :: v_dual_mov_b32 v70, s60
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, s90 :: v_dual_mov_b32 v68, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, s89 :: v_dual_mov_b32 v66, s58
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, s88 :: v_dual_mov_b32 v64, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, s79 :: v_dual_mov_b32 v54, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s78 :: v_dual_mov_b32 v52, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s77 :: v_dual_mov_b32 v50, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s76 :: v_dual_mov_b32 v48, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, s75 :: v_dual_mov_b32 v38, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, s74 :: v_dual_mov_b32 v36, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s73 :: v_dual_mov_b32 v34, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v83, s63 :: v_dual_mov_b32 v32, s41
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s94 :: v_dual_mov_b32 v31, s73
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s93 :: v_dual_mov_b32 v83, s63
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s92 :: v_dual_mov_b32 v81, s61
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s91 :: v_dual_mov_b32 v71, s60
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s90 :: v_dual_mov_b32 v69, s59
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s89 :: v_dual_mov_b32 v67, s58
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s88 :: v_dual_mov_b32 v65, s57
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s79 :: v_dual_mov_b32 v55, s56
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s78 :: v_dual_mov_b32 v53, s47
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s77 :: v_dual_mov_b32 v51, s46
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s76 :: v_dual_mov_b32 v49, s45
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s75 :: v_dual_mov_b32 v39, s44
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s74 :: v_dual_mov_b32 v37, s43
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s72 :: v_dual_mov_b32 v35, s42
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v82, s62 :: v_dual_mov_b32 v33, s41
; GFX11-TRUE16-NEXT: .LBB53_5: ; %end
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v51, v51
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v39, v39
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v33, v33
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v81, v81 :: v_dual_mov_b32 v82, v82
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v83, v83 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v69.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v80 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v65.l
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v66 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v51, v54
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v49, v49
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v37, v37
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v39, v48
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v35, v35
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v33, v36
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v31, v31
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v81.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v80.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v83.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v70.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v82.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v69.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v70.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v33.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v68.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v81.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v66.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v71.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v64.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v69.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v54.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v67.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v52.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v65.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v50.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v55.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v48.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v53.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v38.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v51.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v36.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v49.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v39.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v37.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v31.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v34.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v35.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v30.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v32.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v33.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: bitcast_v15f64_to_v60f16_scalar:
@ -48633,24 +48567,10 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s6 :: v_dual_mov_b32 v81, s4
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v82, s5 :: v_dual_mov_b32 v83, s7
; GFX11-TRUE16-NEXT: .LBB57_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v83, v83 :: v_dual_mov_b32 v82, v82
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v81, v81 :: v_dual_mov_b32 v80, v80
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v83.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v82.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v81.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v80.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v71.l
@ -51320,24 +51240,10 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s6 :: v_dual_mov_b32 v81, s4
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v82, s5 :: v_dual_mov_b32 v83, s7
; GFX11-TRUE16-NEXT: .LBB59_5: ; %end
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v83, v83 :: v_dual_mov_b32 v82, v82
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v81, v81 :: v_dual_mov_b32 v80, v80
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v71, v71 :: v_dual_mov_b32 v70, v70
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v69, v69 :: v_dual_mov_b32 v68, v68
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v67, v67 :: v_dual_mov_b32 v66, v66
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v65, v65 :: v_dual_mov_b32 v64, v64
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v55, v55 :: v_dual_mov_b32 v54, v54
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v53 :: v_dual_mov_b32 v52, v52
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v51 :: v_dual_mov_b32 v50, v50
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v49 :: v_dual_mov_b32 v48, v48
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, v39 :: v_dual_mov_b32 v38, v38
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v37 :: v_dual_mov_b32 v36, v36
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v35 :: v_dual_mov_b32 v34, v34
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v33, v33 :: v_dual_mov_b32 v32, v32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v31 :: v_dual_mov_b32 v30, v30
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v83.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v82.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v81.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v80.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v71.l

View File

@ -272,8 +272,8 @@ body: |
; GCN-LABEL: name: s_pack_ll_b32_b16
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[DEF]], implicit $exec
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B32_e32_]].lo16, %subreg.lo16, [[DEF1]].lo16, %subreg.hi16
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[COPY]].lo16, %subreg.lo16, [[DEF1]].lo16, %subreg.hi16
%0:sreg_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:sreg_32 = COPY %1:vgpr_32
@ -287,8 +287,8 @@ body: |
; GCN-LABEL: name: s_pack_lh_b32_b16
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[DEF]], implicit $exec
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B32_e32_]].lo16, %subreg.lo16, [[DEF1]].hi16, %subreg.hi16
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[COPY]].lo16, %subreg.lo16, [[DEF1]].hi16, %subreg.hi16
%0:sreg_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:sreg_32 = COPY %1:vgpr_32
@ -302,8 +302,8 @@ body: |
; GCN-LABEL: name: s_pack_hl_b32_b16
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[DEF]], implicit $exec
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B32_e32_]].hi16, %subreg.lo16, [[DEF1]].lo16, %subreg.hi16
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[COPY]].hi16, %subreg.lo16, [[DEF1]].lo16, %subreg.hi16
%0:sreg_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:sreg_32 = COPY %1:vgpr_32
@ -317,8 +317,8 @@ body: |
; GCN-LABEL: name: s_pack_hh_b32_b16
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[DEF]], implicit $exec
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B32_e32_]].hi16, %subreg.lo16, [[DEF1]].hi16, %subreg.hi16
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[COPY]].hi16, %subreg.lo16, [[DEF1]].hi16, %subreg.hi16
%0:sreg_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:sreg_32 = COPY %1:vgpr_32

View File

@ -258,17 +258,15 @@ define amdgpu_kernel void @rint_v2f16(
; GFX12-TRUE16-NEXT: s_mov_b32 s8, s2
; GFX12-TRUE16-NEXT: s_mov_b32 s9, s3
; GFX12-TRUE16-NEXT: s_mov_b32 s4, s0
; GFX12-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
; GFX12-TRUE16-NEXT: buffer_load_b32 v1, off, s[8:11], null
; GFX12-TRUE16-NEXT: s_mov_b32 s5, s1
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX12-TRUE16-NEXT: v_rndne_f16_e32 v0.l, v0.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX12-TRUE16-NEXT: v_rndne_f16_e32 v1.l, v1.l
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v1, v1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_rndne_f16_e32 v0.l, v0.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX12-TRUE16-NEXT: buffer_store_b32 v1, off, s[4:7], null
; GFX12-TRUE16-NEXT: s_endpgm
;
; GFX12-FAKE16-LABEL: rint_v2f16:

View File

@ -344,11 +344,8 @@ define amdgpu_kernel void @sqrt_v2f16(
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX12-TRUE16-SDAG-NEXT: v_sqrt_f16_e32 v0.l, v0.l
; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
; GFX12-TRUE16-SDAG-NEXT: v_sqrt_f16_e32 v1.l, v1.l
; GFX12-TRUE16-SDAG-NEXT: v_mov_b32_e32 v1, v1
; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-SDAG-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-SDAG-NEXT: v_sqrt_f16_e32 v0.h, v1.l
; GFX12-TRUE16-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX12-TRUE16-SDAG-NEXT: s_endpgm
;

View File

@ -237,17 +237,15 @@ define amdgpu_kernel void @trunc_v2f16(
; GFX12-TRUE16-NEXT: s_mov_b32 s8, s2
; GFX12-TRUE16-NEXT: s_mov_b32 s9, s3
; GFX12-TRUE16-NEXT: s_mov_b32 s4, s0
; GFX12-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
; GFX12-TRUE16-NEXT: buffer_load_b32 v1, off, s[8:11], null
; GFX12-TRUE16-NEXT: s_mov_b32 s5, s1
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX12-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX12-TRUE16-NEXT: v_trunc_f16_e32 v1.l, v1.l
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v1, v1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX12-TRUE16-NEXT: buffer_store_b32 v1, off, s[4:7], null
; GFX12-TRUE16-NEXT: s_endpgm
;
; GFX12-FAKE16-LABEL: trunc_v2f16:

View File

@ -9680,8 +9680,6 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(ptr addrspace(1) %out
; GFX12-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v2
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v2, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
; GFX12-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-TRUE16-NEXT: s_endpgm
@ -9814,8 +9812,6 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(ptr addrspace(1) %out
; GFX12-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v2
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v2, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
; GFX12-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-TRUE16-NEXT: s_endpgm

View File

@ -1003,11 +1003,9 @@ define i16 @test_vector_reduce_and_v2i16(<2 x i16> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_and_v2i16:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, v0, v2
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_and_v2i16:
@ -1033,11 +1031,9 @@ define i16 @test_vector_reduce_and_v2i16(<2 x i16> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, v0, v2
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_and_v2i16:

View File

@ -1008,11 +1008,9 @@ define i16 @test_vector_reduce_mul_v2i16(<2 x i16> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_mul_v2i16:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_mul_v2i16:
@ -1038,11 +1036,9 @@ define i16 @test_vector_reduce_mul_v2i16(<2 x i16> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_mul_v2i16:
@ -1268,12 +1264,9 @@ define i16 @test_vector_reduce_mul_v4i16(<4 x i16> %v) {
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_mul_v4i16:
@ -1302,12 +1295,9 @@ define i16 @test_vector_reduce_mul_v4i16(<4 x i16> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_mul_v4i16:
@ -1448,13 +1438,11 @@ define i16 @test_vector_reduce_mul_v8i16(<8 x i16> %v) {
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_mul_v8i16:
@ -1490,13 +1478,11 @@ define i16 @test_vector_reduce_mul_v8i16(<8 x i16> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_mul_v8i16:
@ -1704,13 +1690,11 @@ define i16 @test_vector_reduce_mul_v16i16(<16 x i16> %v) {
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_mul_v16i16:
@ -1761,13 +1745,11 @@ define i16 @test_vector_reduce_mul_v16i16(<16 x i16> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_mul_v16i16:

View File

@ -1034,11 +1034,9 @@ define i16 @test_vector_reduce_or_v2i16(<2 x i16> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_or_v2i16:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, v0, v2
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_or_v2i16:
@ -1064,11 +1062,9 @@ define i16 @test_vector_reduce_or_v2i16(<2 x i16> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, v0, v2
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_or_v2i16:

View File

@ -980,11 +980,9 @@ define i16 @test_vector_reduce_xor_v2i16(<2 x i16> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_xor_v2i16:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_xor_v2i16:
@ -1010,11 +1008,9 @@ define i16 @test_vector_reduce_xor_v2i16(<2 x i16> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_xor_v2i16: