[AMDGPU] Add support for v_cvt_f32_bf8 on gfx1250 (#147600)

This PR doesn't really need to change anything else, since the
instruction is
already supported, but just not tested.

Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>
This commit is contained in:
Shilei Tian 2025-07-08 20:40:17 -04:00 committed by GitHub
parent 026307958b
commit d64faec940
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 199 additions and 0 deletions

View File

@ -1,11 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
define amdgpu_cs float @test_cvt_f32_bf8_byte0(i32 %a) {
; GFX12-LABEL: test_cvt_f32_bf8_byte0:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: test_cvt_f32_bf8_byte0:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-NEXT: ; return to shader part epilog
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 0)
ret float %ret
@ -16,6 +22,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) {
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: test_cvt_f32_bf8_byte1:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-NEXT: ; return to shader part epilog
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 1)
ret float %ret
@ -26,6 +37,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) {
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: test_cvt_f32_bf8_byte2:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-NEXT: ; return to shader part epilog
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 2)
ret float %ret
@ -36,6 +52,11 @@ define amdgpu_cs float @test_cvt_f32_fp8_byte3(i32 %a) {
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: test_cvt_f32_fp8_byte3:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-NEXT: ; return to shader part epilog
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %tmp0, i32 3)
ret float %ret
@ -47,6 +68,13 @@ define amdgpu_cs void @test_cvt_pk_bf8_f32_word0(i32 %a, float %y, i32 %old, ptr
; GFX12-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: test_cvt_pk_bf8_f32_word0:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%tmp1 = bitcast i32 %tmp0 to float
%ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %tmp1, float %y, i32 %old, i1 false)
@ -62,6 +90,15 @@ define amdgpu_cs void @test_cvt_pk_fp8_f32_word1(i32 %a, float %y, i32 %old, ptr
; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: test_cvt_pk_fp8_f32_word1:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%tmp1 = bitcast i32 %tmp0 to float
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %tmp1, float %y, i32 %old, i1 true)
@ -75,6 +112,13 @@ define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr a
; GFX12-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: test_cvt_sr_bf8_f32_byte0:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%tmp1 = bitcast i32 %tmp0 to float
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %tmp1, i32 %r, i32 %old, i32 0)
@ -88,6 +132,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr a
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte1:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%tmp1 = bitcast i32 %tmp0 to float
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %tmp1, i32 %r, i32 %old, i32 1)
@ -101,6 +152,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte2(i32 %a, i32 %r, i32 %old, ptr a
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte2:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%tmp1 = bitcast i32 %tmp0 to float
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %tmp1, i32 %r, i32 %old, i32 2)

View File

@ -88,6 +88,15 @@ v_cvt_pk_f16_fp8 v1, s2
v_cvt_pk_f16_fp8 v1, 100
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
v_cvt_f32_bf8_e32 v1, s3
// GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e]
v_cvt_f32_bf8_e32 v1, 3
// GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e]
v_cvt_f32_bf8_e32 v1, v3
// GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e]
v_cvt_f32_fp8_e32 v1, s3
// GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e]

View File

@ -97,6 +97,15 @@ v_cvt_pk_f16_fp8 v1, s2
v_cvt_pk_f16_fp8 v1, 100
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
v_cvt_f32_bf8_e32 v1, s3
// GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e]
v_cvt_f32_bf8_e32 v1, 3
// GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e]
v_cvt_f32_bf8_e32 v1, v3
// GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e]
v_cvt_f32_fp8_e32 v1, s3
// GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e]

View File

@ -1,6 +1,42 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
v_cvt_f32_bf8_e64 v1, s3
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, s3 byte_sel:1
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, s3 byte_sel:2
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, s3 byte_sel:3
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, 3
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, 3 byte_sel:1
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, 3 byte_sel:2
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, 3 byte_sel:3
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, v3
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
v_cvt_f32_bf8_e64 v1, v3 byte_sel:1
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
v_cvt_f32_bf8_e64 v1, v3 byte_sel:2
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
v_cvt_f32_bf8_e64 v1, v3 byte_sel:3
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
v_cvt_f32_fp8_e64 v1, s3
// GFX1250: v_cvt_f32_fp8_e64 v1, s3 ; encoding: [0x01,0x00,0xec,0xd5,0x03,0x00,0x00,0x00]

View File

@ -1,6 +1,42 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
v_cvt_f32_bf8_e64 v1, s3
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, s3 byte_sel:1
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, s3 byte_sel:2
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, s3 byte_sel:3
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, 3
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, 3 byte_sel:1
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, 3 byte_sel:2
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, 3 byte_sel:3
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
v_cvt_f32_bf8_e64 v1, v3
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
v_cvt_f32_bf8_e64 v1, v3 byte_sel:1
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
v_cvt_f32_bf8_e64 v1, v3 byte_sel:2
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
v_cvt_f32_bf8_e64 v1, v3 byte_sel:3
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
v_cvt_f32_fp8_e64 v1, s3
// GFX1250: v_cvt_f32_fp8_e64 v1, s3 ; encoding: [0x01,0x00,0xec,0xd5,0x03,0x00,0x00,0x00]

View File

@ -108,6 +108,15 @@
# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e]
# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
0x03,0xda,0x02,0x7e
# GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e]
0x83,0xda,0x02,0x7e
# GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e]
0x03,0xdb,0x02,0x7e
# GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e]
0x03,0xd8,0x02,0x7e
# GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e]

View File

@ -75,6 +75,12 @@
0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e
# GFX1250: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e]
0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac
# GFX1250: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xc ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac]
0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e
# GFX1250: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e]
0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff
# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]

View File

@ -2,6 +2,42 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00
# GFX1250: v_cvt_f32_fp8_e64 v1, 3 ; encoding: [0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00]