[AMDGPU][True16] add true16 pattern for cvt_pk_fp32_f8 (#180096)

This commit is contained in:
Guo Chen 2026-03-24 15:03:00 -04:00 committed by GitHub
parent 7712249363
commit e71da01f0f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 299 additions and 136 deletions

View File

@ -821,12 +821,25 @@ class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index,
(inst_e32 $src))
>;
class Cvt_PK_F32_F8_Pat_t16<SDPatternOperator node, int index,
VOP3_Pseudo inst_e64> : GCNPat<
(v2f32 (node i32:$src, index)),
(inst_e64 0, (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS $src, VGPR_32)),
!if(index, hi16, lo16)), 0)
>;
let SubtargetPredicate = isGFX11Plus, OtherPredicates = [HasFP8ConversionInsts] in {
foreach Index = [0, -1] in {
def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index,
V_CVT_PK_F32_FP8_fake16_e32, V_CVT_PK_F32_FP8_fake16_e64>;
def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index,
V_CVT_PK_F32_BF8_fake16_e32, V_CVT_PK_F32_BF8_fake16_e64>;
let True16Predicate = UseFakeTrue16Insts in {
def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index,
V_CVT_PK_F32_FP8_fake16_e32, V_CVT_PK_F32_FP8_fake16_e64>;
def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index,
V_CVT_PK_F32_BF8_fake16_e32, V_CVT_PK_F32_BF8_fake16_e64>;
}
let True16Predicate = UseRealTrue16Insts in {
def : Cvt_PK_F32_F8_Pat_t16<int_amdgcn_cvt_pk_f32_fp8, Index, V_CVT_PK_F32_FP8_t16_e64>;
def : Cvt_PK_F32_F8_Pat_t16<int_amdgcn_cvt_pk_f32_bf8, Index, V_CVT_PK_F32_BF8_t16_e64>;
}
}
}

View File

@ -305,28 +305,51 @@ define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
; GFX9X-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
; GFX1170-TRUE16-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX1170-FAKE16-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
; GFX12-TRUE16-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-TRUE16-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0.l
; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: test_cvt_pk_f32_bf8_word0:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
ret <2 x float> %ret
}
@ -338,28 +361,51 @@ define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
; GFX9X-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
; GFX1170-TRUE16-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0.h
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX1170-FAKE16-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
; GFX12-TRUE16-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0.h
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-TRUE16-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0.h
; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: test_cvt_pk_f32_bf8_word1:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true)
ret <2 x float> %ret
}
@ -371,28 +417,51 @@ define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
; GFX9X-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
; GFX1170-TRUE16-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX1170-FAKE16-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
; GFX12-TRUE16-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-TRUE16-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0.l
; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: test_cvt_pk_f32_fp8_word0:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false)
ret <2 x float> %ret
}
@ -404,28 +473,51 @@ define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
; GFX9X-NEXT: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cvt_pk_f32_fp8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
; GFX1170-TRUE16-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0.h
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cvt_pk_f32_fp8_e64 v[0:1], v0 op_sel:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX1170-FAKE16-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cvt_pk_f32_fp8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_pk_f32_fp8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
; GFX12-TRUE16-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0.h
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_cvt_pk_f32_fp8_e64 v[0:1], v0 op_sel:[1,0]
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-TRUE16-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0.h
; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: test_cvt_pk_f32_fp8_word1:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: v_cvt_pk_f32_fp8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
ret <2 x float> %ret
}
@ -1124,34 +1216,63 @@ define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
; GFX9X-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
; GFX1170-TRUE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-TRUE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0.h
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX1170-FAKE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
; GFX12-TRUE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0.h
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-TRUE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-TRUE16-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0.h
; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-FAKE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a.sext, i1 true)
ret <2 x float> %ret
@ -1165,34 +1286,63 @@ define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) {
; GFX9X-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
; GFX1170-TRUE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX1170-FAKE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
; GFX12-TRUE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-TRUE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0.l
; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-FAKE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a.sext, i1 false)
ret <2 x float> %ret