From d64faec94071e67c2318712bc3f61734e28f4e2a Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 8 Jul 2025 20:40:17 -0400 Subject: [PATCH] [AMDGPU] Add support for `v_cvt_f32_bf8` on gfx1250 (#147600) This PR doesn't really need to change anything else, since the instruction is already supported, but just not tested. Co-authored-by: Mekhanoshin, Stanislav --- .../CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll | 58 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s | 9 +++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s | 9 +++ .../gfx1250_asm_vop3_from_vop1-fake16.s | 36 ++++++++++++ .../MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s | 36 ++++++++++++ .../Disassembler/AMDGPU/gfx1250_dasm_vop1.txt | 9 +++ .../AMDGPU/gfx1250_dasm_vop1_dpp16.txt | 6 ++ .../AMDGPU/gfx1250_dasm_vop3_from_vop1.txt | 36 ++++++++++++ 8 files changed, 199 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll index 7f1c01a7a000..aaaa75107881 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s define amdgpu_cs float @test_cvt_f32_bf8_byte0(i32 %a) { ; GFX12-LABEL: test_cvt_f32_bf8_byte0: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: test_cvt_f32_bf8_byte0: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1250-NEXT: ; return to shader part epilog %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1) %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 0) ret float %ret @@ -16,6 +22,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) { ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: test_cvt_f32_bf8_byte1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1250-NEXT: ; return to shader part epilog %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1) %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 1) ret float %ret @@ -26,6 +37,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) { ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: test_cvt_f32_bf8_byte2: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1250-NEXT: ; return to shader part epilog %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1) %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 2) ret float %ret @@ -36,6 +52,11 @@ define amdgpu_cs float @test_cvt_f32_fp8_byte3(i32 %a) { ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: test_cvt_f32_fp8_byte3: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1250-NEXT: ; return to shader part epilog %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1) %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %tmp0, i32 3) ret float %ret @@ -47,6 +68,13 @@ define amdgpu_cs void @test_cvt_pk_bf8_f32_word0(i32 %a, float %y, i32 %old, ptr ; GFX12-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: global_store_b32 v[3:4], v2, off ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: test_cvt_pk_bf8_f32_word0: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1250-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-NEXT: s_endpgm %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1) %tmp1 = bitcast i32 %tmp0 to float %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %tmp1, float %y, i32 %old, i1 false) @@ -62,6 +90,15 @@ define amdgpu_cs void @test_cvt_pk_fp8_f32_word1(i32 %a, float %y, i32 %old, ptr ; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] ; GFX12-NEXT: global_store_b32 v[3:4], v2, off ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: test_cvt_pk_fp8_f32_word1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] +; GFX1250-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-NEXT: s_endpgm %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1) %tmp1 = bitcast i32 %tmp0 to float %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %tmp1, float %y, i32 %old, i1 true) @@ -75,6 +112,13 @@ define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr a ; GFX12-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: global_store_b32 v[3:4], v2, off ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: test_cvt_sr_bf8_f32_byte0: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1250-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-NEXT: s_endpgm %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1) %tmp1 = bitcast i32 %tmp0 to float %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %tmp1, i32 %r, i32 %old, i32 0) @@ -88,6 +132,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr a ; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: global_store_b32 v[3:4], v2, off ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1250-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-NEXT: s_endpgm %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1) %tmp1 = bitcast i32 %tmp0 to float %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %tmp1, i32 %r, i32 %old, i32 1) @@ -101,6 +152,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte2(i32 %a, i32 %r, i32 %old, ptr a ; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: global_store_b32 v[3:4], v2, off ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte2: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1250-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-NEXT: s_endpgm %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1) %tmp1 = bitcast i32 %tmp0 to float %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %tmp1, i32 %r, i32 %old, i32 2) diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index 9d437c7e7909..591c590bf378 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -88,6 +88,15 @@ v_cvt_pk_f16_fp8 v1, s2 v_cvt_pk_f16_fp8 v1, 100 // GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00] +v_cvt_f32_bf8_e32 v1, s3 +// GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e] + +v_cvt_f32_bf8_e32 v1, 3 +// GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e] + +v_cvt_f32_bf8_e32 v1, v3 +// GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e] + v_cvt_f32_fp8_e32 v1, s3 // GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index ff16e80c2d93..7b16c22b47ac 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -97,6 +97,15 @@ v_cvt_pk_f16_fp8 v1, s2 v_cvt_pk_f16_fp8 v1, 100 // GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00] +v_cvt_f32_bf8_e32 v1, s3 +// GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e] + +v_cvt_f32_bf8_e32 v1, 3 +// GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e] + +v_cvt_f32_bf8_e32 v1, v3 +// GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e] + v_cvt_f32_fp8_e32 v1, s3 // GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s index 5874bb76b36d..f23557df0d74 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s @@ -1,6 +1,42 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s +v_cvt_f32_bf8_e64 v1, s3 +// GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 +// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 +// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 +// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, 3 +// GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 +// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 +// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 +// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, v3 +// GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 +// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 +// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 +// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00] + v_cvt_f32_fp8_e64 v1, s3 // GFX1250: v_cvt_f32_fp8_e64 v1, s3 ; encoding: [0x01,0x00,0xec,0xd5,0x03,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s index 16bfc10fb16a..5f2313227c4b 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s @@ -1,6 +1,42 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s +v_cvt_f32_bf8_e64 v1, s3 +// GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 +// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 +// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 +// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, 3 +// GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 +// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 +// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 +// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, v3 +// GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 +// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 +// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00] + +v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 +// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00] + v_cvt_f32_fp8_e64 v1, s3 // GFX1250: v_cvt_f32_fp8_e64 v1, s3 ; encoding: [0x01,0x00,0xec,0xd5,0x03,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt index dd472d58893f..af583fe9697e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -108,6 +108,15 @@ # GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e] # GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e] +0x03,0xda,0x02,0x7e +# GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e] + +0x83,0xda,0x02,0x7e +# GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e] + +0x03,0xdb,0x02,0x7e +# GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e] + 0x03,0xd8,0x02,0x7e # GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt index 9656dcdeddae..1475be10201a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt @@ -75,6 +75,12 @@ 0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e # GFX1250: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e] +0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac +# GFX1250: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xc ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac] + +0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e +# GFX1250: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e] + 0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff # GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff] # GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt index ae98cf2e0ff5..e0acec3c1e3e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt @@ -2,6 +2,42 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s +0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00] + +0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00] + +0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00] + +0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00] + +0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00] + +0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00] + +0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00] + +0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00] + +0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00] + +0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00] + +0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00] + +0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00 +# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00] + 0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00 # GFX1250: v_cvt_f32_fp8_e64 v1, 3 ; encoding: [0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00]