[AMDGPU] Add support for v_cvt_f32_bf8 on gfx1250 (#147600)
This PR doesn't really need to change anything else, since the instruction is already supported, but just not tested. Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>
This commit is contained in:
parent
026307958b
commit
d64faec940
@ -1,11 +1,17 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_cs float @test_cvt_f32_bf8_byte0(i32 %a) {
|
||||
; GFX12-LABEL: test_cvt_f32_bf8_byte0:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-LABEL: test_cvt_f32_bf8_byte0:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX1250-NEXT: ; return to shader part epilog
|
||||
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 0)
|
||||
ret float %ret
|
||||
@ -16,6 +22,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) {
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-LABEL: test_cvt_f32_bf8_byte1:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX1250-NEXT: ; return to shader part epilog
|
||||
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 1)
|
||||
ret float %ret
|
||||
@ -26,6 +37,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) {
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-LABEL: test_cvt_f32_bf8_byte2:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX1250-NEXT: ; return to shader part epilog
|
||||
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 2)
|
||||
ret float %ret
|
||||
@ -36,6 +52,11 @@ define amdgpu_cs float @test_cvt_f32_fp8_byte3(i32 %a) {
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-LABEL: test_cvt_f32_fp8_byte3:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX1250-NEXT: ; return to shader part epilog
|
||||
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %tmp0, i32 3)
|
||||
ret float %ret
|
||||
@ -47,6 +68,13 @@ define amdgpu_cs void @test_cvt_pk_bf8_f32_word0(i32 %a, float %y, i32 %old, ptr
|
||||
; GFX12-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: test_cvt_pk_bf8_f32_word0:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
|
||||
%tmp1 = bitcast i32 %tmp0 to float
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %tmp1, float %y, i32 %old, i1 false)
|
||||
@ -62,6 +90,15 @@ define amdgpu_cs void @test_cvt_pk_fp8_f32_word1(i32 %a, float %y, i32 %old, ptr
|
||||
; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
|
||||
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: test_cvt_pk_fp8_f32_word1:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
|
||||
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
|
||||
%tmp1 = bitcast i32 %tmp0 to float
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %tmp1, float %y, i32 %old, i1 true)
|
||||
@ -75,6 +112,13 @@ define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr a
|
||||
; GFX12-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: test_cvt_sr_bf8_f32_byte0:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
|
||||
%tmp1 = bitcast i32 %tmp0 to float
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %tmp1, i32 %r, i32 %old, i32 0)
|
||||
@ -88,6 +132,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr a
|
||||
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte1:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
|
||||
%tmp1 = bitcast i32 %tmp0 to float
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %tmp1, i32 %r, i32 %old, i32 1)
|
||||
@ -101,6 +152,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte2(i32 %a, i32 %r, i32 %old, ptr a
|
||||
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte2:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
|
||||
%tmp1 = bitcast i32 %tmp0 to float
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %tmp1, i32 %r, i32 %old, i32 2)
|
||||
|
||||
@ -88,6 +88,15 @@ v_cvt_pk_f16_fp8 v1, s2
|
||||
v_cvt_pk_f16_fp8 v1, 100
|
||||
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e32 v1, s3
|
||||
// GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e]
|
||||
|
||||
v_cvt_f32_bf8_e32 v1, 3
|
||||
// GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e]
|
||||
|
||||
v_cvt_f32_bf8_e32 v1, v3
|
||||
// GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e]
|
||||
|
||||
v_cvt_f32_fp8_e32 v1, s3
|
||||
// GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e]
|
||||
|
||||
|
||||
@ -97,6 +97,15 @@ v_cvt_pk_f16_fp8 v1, s2
|
||||
v_cvt_pk_f16_fp8 v1, 100
|
||||
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e32 v1, s3
|
||||
// GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e]
|
||||
|
||||
v_cvt_f32_bf8_e32 v1, 3
|
||||
// GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e]
|
||||
|
||||
v_cvt_f32_bf8_e32 v1, v3
|
||||
// GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e]
|
||||
|
||||
v_cvt_f32_fp8_e32 v1, s3
|
||||
// GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e]
|
||||
|
||||
|
||||
@ -1,6 +1,42 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, s3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, s3 byte_sel:1
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, s3 byte_sel:2
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, s3 byte_sel:3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, 3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, 3 byte_sel:1
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, 3 byte_sel:2
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, 3 byte_sel:3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, v3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, v3 byte_sel:1
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, v3 byte_sel:2
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, v3 byte_sel:3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_f32_fp8_e64 v1, s3
|
||||
// GFX1250: v_cvt_f32_fp8_e64 v1, s3 ; encoding: [0x01,0x00,0xec,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
|
||||
@ -1,6 +1,42 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, s3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, s3 byte_sel:1
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, s3 byte_sel:2
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, s3 byte_sel:3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, 3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, 3 byte_sel:1
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, 3 byte_sel:2
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, 3 byte_sel:3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, v3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, v3 byte_sel:1
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, v3 byte_sel:2
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_f32_bf8_e64 v1, v3 byte_sel:3
|
||||
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_f32_fp8_e64 v1, s3
|
||||
// GFX1250: v_cvt_f32_fp8_e64 v1, s3 ; encoding: [0x01,0x00,0xec,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
|
||||
@ -108,6 +108,15 @@
|
||||
# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e]
|
||||
# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
|
||||
|
||||
0x03,0xda,0x02,0x7e
|
||||
# GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e]
|
||||
|
||||
0x83,0xda,0x02,0x7e
|
||||
# GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e]
|
||||
|
||||
0x03,0xdb,0x02,0x7e
|
||||
# GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e]
|
||||
|
||||
0x03,0xd8,0x02,0x7e
|
||||
# GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e]
|
||||
|
||||
|
||||
@ -75,6 +75,12 @@
|
||||
0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e
|
||||
# GFX1250: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e]
|
||||
|
||||
0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac
|
||||
# GFX1250: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xc ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac]
|
||||
|
||||
0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e
|
||||
# GFX1250: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e]
|
||||
|
||||
0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff
|
||||
# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
|
||||
|
||||
@ -2,6 +2,42 @@
|
||||
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
|
||||
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
|
||||
|
||||
0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00
|
||||
# GFX1250: v_cvt_f32_fp8_e64 v1, 3 ; encoding: [0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00]
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user