[AMDGPU] Add V_ASHR_PK_I8_I32 and V_ASHR_PK_U8_I32 on gfx1250 (#151389)
This commit is contained in:
parent
8377f90c21
commit
b3b36d3590
@ -2015,6 +2015,8 @@ let AssemblerPredicate = isGFX11Plus in {
|
||||
|
||||
// These instructions differ from GFX12 variant by supporting DPP:
|
||||
defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
|
||||
defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>;
|
||||
defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>;
|
||||
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
|
||||
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950 %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
|
||||
define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
|
||||
; GFX950-LABEL: v_ashr_pk_i8_i32:
|
||||
; GFX950: ; %bb.0:
|
||||
@ -13,6 +14,20 @@ define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i3
|
||||
; GFX950-NEXT: v_ashr_pk_i8_i32 v1, s0, v1, v2
|
||||
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
|
||||
; GFX950-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: v_ashr_pk_i8_i32:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_and_b32 s2, s2, 31
|
||||
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX1250-NEXT: v_ashr_pk_i8_i32 v0, s0, s1, v0
|
||||
; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
|
||||
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1
|
||||
%src2.clamp = and i32 %src2, 31
|
||||
@ -40,6 +55,20 @@ define amdgpu_kernel void @v_ashr_pk_u8_i32(ptr addrspace(1) %out, i32 %src0, i3
|
||||
; GFX950-NEXT: v_ashr_pk_u8_i32 v1, s0, v1, v2
|
||||
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
|
||||
; GFX950-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: v_ashr_pk_u8_i32:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_and_b32 s2, s2, 31
|
||||
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX1250-NEXT: v_ashr_pk_u8_i32 v0, s0, s1, v0
|
||||
; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
|
||||
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1
|
||||
%src2.clamp = and i32 %src2, 31
|
||||
|
@ -366,3 +366,39 @@ v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
|
||||
|
||||
v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
|
||||
// GFX1250: v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6e,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, s4, v7, v8
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, 0, 1
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, 3, s2
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, s4, 4, v2
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, 12345
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
|
||||
|
||||
v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
|
||||
// GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, s4, v7, v8
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, 0, 1
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, 3, s2
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, s4, 4, v2
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, 12345
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
|
||||
|
||||
v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
|
||||
// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
|
||||
|
@ -366,3 +366,39 @@ v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
|
||||
|
||||
v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
|
||||
// GFX1250: v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6e,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, s4, v7, v8
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, 0, 1
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, 3, s2
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, s4, 4, v2
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, 12345
|
||||
// GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
|
||||
|
||||
v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
|
||||
// GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, s4, v7, v8
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, 0, 1
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, 3, s2
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, s4, 4, v2
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, 12345
|
||||
// GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
|
||||
|
||||
v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
|
||||
// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
|
||||
|
@ -297,3 +297,35 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 ban
|
||||
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
|
||||
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x6e,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, v8 row_share:3 fi:1
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, v8 row_share:3 fi:1
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
@ -297,3 +297,35 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 ban
|
||||
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
|
||||
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x6e,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, v8 row_share:3 fi:1
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, v8 row_share:3 fi:1
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
@ -209,3 +209,19 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
|
||||
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x6e,0xd7,0xe9,0xfe,0xf7,0x7b,0xff,0x00,0x00,0x00]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x90,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x90,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
@ -209,3 +209,19 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
|
||||
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x6e,0xd7,0xe9,0xfe,0xf7,0x7b,0xff,0x00,0x00,0x00]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x90,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_i8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x90,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
@ -86,3 +86,13 @@ v_mad_nc_i64_i32 v[4:5], v2, v5, v[6:7] quad_perm:[3,2,1,0]
|
||||
// GFX1251-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
|
||||
// GFX125X-ERR-NEXT:{{^}}v_mad_nc_i64_i32 v[4:5], v2, v5, v[6:7] quad_perm:[3,2,1,0]
|
||||
// GFX125X-ERR-NEXT:{{^}} ^
|
||||
|
||||
v_ashr_pk_i8_i32 v1, v2, v3, v4 clamp
|
||||
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// GFX125X-ERR-NEXT:{{^}}v_ashr_pk_i8_i32 v1, v2, v3, v4 clamp
|
||||
// GFX125X-ERR-NEXT:{{^}} ^
|
||||
|
||||
v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
|
||||
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// GFX125X-ERR-NEXT:{{^}}v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
|
||||
// GFX125X-ERR-NEXT:{{^}} ^
|
||||
|
@ -386,6 +386,42 @@
|
||||
0x05,0x01,0x6e,0xd7,0x7e,0x82,0xad,0x01
|
||||
# GFX1250: v_cvt_sr_pk_bf16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x6e,0xd7,0x7e,0x82,0xad,0x01]
|
||||
|
||||
0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04
|
||||
# GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
|
||||
|
||||
0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04
|
||||
# GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
|
||||
|
||||
0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02
|
||||
# GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
|
||||
|
||||
0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00
|
||||
# GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
|
||||
|
||||
0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
|
||||
# GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
|
||||
|
||||
0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04
|
||||
# GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
|
||||
|
||||
0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04
|
||||
# GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
|
||||
|
||||
0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04
|
||||
# GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
|
||||
|
||||
0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02
|
||||
# GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
|
||||
|
||||
0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00
|
||||
# GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
|
||||
|
||||
0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
|
||||
# GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
|
||||
|
||||
0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04
|
||||
# GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
|
||||
|
||||
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
# GFX1250-FAKE16: {{.*}}
|
||||
# GFX1250-REAL16: {{.*}}
|
||||
|
@ -245,3 +245,27 @@
|
||||
|
||||
0x05,0x00,0x6e,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff
|
||||
# GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
|
||||
|
||||
0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53
|
||||
# GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
|
||||
|
||||
0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff
|
||||
# GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
|
||||
|
||||
0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff
|
||||
# GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
|
||||
|
||||
0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff
|
||||
# GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
|
||||
|
||||
0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53
|
||||
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
|
||||
|
||||
0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff
|
||||
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
|
||||
|
||||
0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff
|
||||
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
|
||||
|
||||
0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff
|
||||
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
|
||||
|
@ -175,3 +175,15 @@
|
||||
|
||||
0x05,0x00,0x6e,0xd7,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05
|
||||
# GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd7,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
|
||||
|
||||
0x05,0x40,0x90,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05
|
||||
# GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x90,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
|
||||
|
||||
0x05,0x00,0x90,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
|
||||
# GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x90,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
|
||||
|
||||
0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05
|
||||
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
|
||||
|
||||
0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
|
||||
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
|
||||
|
Loading…
x
Reference in New Issue
Block a user