[AMDGPU] Add assembler/disassembler support for v_dual_dot2acc_f32_bf16 (#118984)
There is still no codegen support because the corresponding v_dot2c_f32_bf16 instruction is not supported on GFX11.
This commit is contained in:
parent
62a25a4c7c
commit
f9d6d46a8e
@ -1190,7 +1190,7 @@ let Constraints = "$vdst = $src2",
|
||||
defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>;
|
||||
|
||||
let SubtargetPredicate = HasDot13Insts in
|
||||
defm V_DOT2C_F32_BF16 : VOP2Inst<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16>;
|
||||
defm V_DOT2C_F32_BF16 : VOP2Inst_VOPD<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16, 0xd, "v_dot2acc_f32_bf16">;
|
||||
}
|
||||
|
||||
let AddedComplexity = 30 in {
|
||||
|
@ -87,12 +87,12 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
|
||||
let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
|
||||
let mayRaiseFPException = ReadsModeReg;
|
||||
|
||||
// V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 need a dummy src2 tied to dst for
|
||||
// passes to track its uses. Its presence does not affect VOPD formation rules
|
||||
// because the rules for src2 and dst are the same. src2X and src2Y should not
|
||||
// be encoded.
|
||||
bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"));
|
||||
bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"));
|
||||
// V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 and V_DUAL_DOT2ACC_F32_BF16 need a
|
||||
// dummy src2 tied to dst for passes to track its uses. Its presence does not
|
||||
// affect VOPD formation rules because the rules for src2 and dst are the
|
||||
// same. src2X and src2Y should not be encoded.
|
||||
bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"), !eq(VDX.Mnemonic, "v_dot2c_f32_bf16"));
|
||||
bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"), !eq(VDY.Mnemonic, "v_dot2c_f32_bf16"));
|
||||
string ConstraintsX = !if(hasSrc2AccX, "$src2X = $vdstX", "");
|
||||
string ConstraintsY = !if(hasSrc2AccY, "$src2Y = $vdstY", "");
|
||||
let Constraints =
|
||||
@ -125,16 +125,12 @@ class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
|
||||
let FixedSize = 1;
|
||||
}
|
||||
|
||||
// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is
|
||||
// not. V_DUAL_DOT2C_F32_BF16 is a legal instruction on GFX12, but
|
||||
// V_DOT2C_F32_F16_e32 is not. Since we generate the DUAL form by converting
|
||||
// from the normal form we will never generate them.
|
||||
defvar VOPDPseudosCommon = [
|
||||
"V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
|
||||
"V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
|
||||
"V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32"
|
||||
];
|
||||
defvar VOPDPseudosGFX11 = ["V_DOT2C_F32_F16_e32"];
|
||||
defvar VOPDPseudosGFX11 = ["V_DOT2C_F32_F16_e32", "V_DOT2C_F32_BF16_e32"];
|
||||
defvar VOPDYOnlyPseudosCommon = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32",
|
||||
"V_AND_B32_e32"];
|
||||
|
||||
|
@ -14817,3 +14817,219 @@ v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5
|
||||
v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4
|
||||
// GFX11: encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, v4, v2 :: v_dual_dot2acc_f32_bf16 v6, v1, v3
|
||||
// GFX11: encoding: [0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, v1, v2 :: v_dual_dot2acc_f32_bf16 v6, v255, v3
|
||||
// GFX11: encoding: [0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, v255, v2 :: v_dual_dot2acc_f32_bf16 v6, v2, v3
|
||||
// GFX11: encoding: [0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, v2, v2 :: v_dual_dot2acc_f32_bf16 v6, v3, v3
|
||||
// GFX11: encoding: [0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, v3, v2 :: v_dual_dot2acc_f32_bf16 v6, v4, v3
|
||||
// GFX11: encoding: [0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, s105, v2 :: v_dual_dot2acc_f32_bf16 v6, s1, v3
|
||||
// GFX11: encoding: [0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, s1, v2 :: v_dual_dot2acc_f32_bf16 v6, s105, v3
|
||||
// GFX11: encoding: [0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, ttmp15, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_lo, v3
|
||||
// GFX11: encoding: [0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, exec_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_hi, v3
|
||||
// GFX11: encoding: [0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, exec_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, ttmp15, v3
|
||||
// GFX11: encoding: [0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, m0, v2 :: v_dual_dot2acc_f32_bf16 v6, m0, v3
|
||||
// GFX11: encoding: [0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_lo, v3
|
||||
// GFX11: encoding: [0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_hi, v3
|
||||
// GFX11: encoding: [0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_dot2acc_f32_bf16 v6, null, v3
|
||||
// GFX11: encoding: [0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, src_scc, v2 :: v_dual_dot2acc_f32_bf16 v6, -1, v3
|
||||
// GFX11: encoding: [0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, 0.5, v3 :: v_dual_dot2acc_f32_bf16 v6, 0.5, v2
|
||||
// GFX11: encoding: [0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v255, -1, v4 :: v_dual_dot2acc_f32_bf16 v6, src_scc, v5
|
||||
// GFX11: encoding: [0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_add_f32 v6, null, v5 :: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v4
|
||||
// GFX11: encoding: [0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3
|
||||
// GFX11: encoding: [0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3
|
||||
// GFX11: encoding: [0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3
|
||||
// GFX11: encoding: [0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3
|
||||
// GFX11: encoding: [0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3
|
||||
// GFX11: encoding: [0x03,0x05,0x48,0xcb,0x04,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3
|
||||
// GFX11: encoding: [0x69,0x04,0x48,0xcb,0x01,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3
|
||||
// GFX11: encoding: [0x01,0x04,0x48,0xcb,0x69,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3
|
||||
// GFX11: encoding: [0x7b,0x04,0x48,0xcb,0x6a,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3
|
||||
// GFX11: encoding: [0x7f,0x04,0x48,0xcb,0x6b,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3
|
||||
// GFX11: encoding: [0x7e,0x04,0x48,0xcb,0x7b,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3
|
||||
// GFX11: encoding: [0x7d,0x04,0x48,0xcb,0x7d,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3
|
||||
// GFX11: encoding: [0x6b,0x04,0x48,0xcb,0x7e,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3
|
||||
// GFX11: encoding: [0x6a,0x04,0x48,0xcb,0x7f,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_f32 v6, null, v3
|
||||
// GFX11: encoding: [0xff,0x04,0x48,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3
|
||||
// GFX11: encoding: [0xfd,0x04,0x48,0xcb,0xc1,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2
|
||||
// GFX11: encoding: [0xf0,0x06,0x48,0xcb,0xf0,0x04,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5
|
||||
// GFX11: encoding: [0xc1,0x08,0x48,0xcb,0xfd,0x0a,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4
|
||||
// GFX11: encoding: [0x7c,0x0a,0x48,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3
|
||||
// GFX11: encoding: [0x04,0x05,0x60,0xcb,0x01,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3
|
||||
// GFX11: encoding: [0x01,0x05,0x60,0xcb,0xff,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3
|
||||
// GFX11: encoding: [0xff,0x05,0x60,0xcb,0x02,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3
|
||||
// GFX11: encoding: [0x02,0x05,0x60,0xcb,0x03,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3
|
||||
// GFX11: encoding: [0x03,0x05,0x60,0xcb,0x04,0x07,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3
|
||||
// GFX11: encoding: [0x69,0x04,0x60,0xcb,0x01,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3
|
||||
// GFX11: encoding: [0x01,0x04,0x60,0xcb,0x69,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3
|
||||
// GFX11: encoding: [0x7b,0x04,0x60,0xcb,0x6a,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3
|
||||
// GFX11: encoding: [0x7f,0x04,0x60,0xcb,0x6b,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3
|
||||
// GFX11: encoding: [0x7e,0x04,0x60,0xcb,0x7b,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3
|
||||
// GFX11: encoding: [0x7d,0x04,0x60,0xcb,0x7d,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3
|
||||
// GFX11: encoding: [0x6b,0x04,0x60,0xcb,0x7e,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3
|
||||
// GFX11: encoding: [0x6a,0x04,0x60,0xcb,0x7f,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_nc_u32 v6, null, v3
|
||||
// GFX11: encoding: [0xff,0x04,0x60,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3
|
||||
// GFX11: encoding: [0xfd,0x04,0x60,0xcb,0xc1,0x06,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2
|
||||
// GFX11: encoding: [0xf0,0x06,0x60,0xcb,0xf0,0x04,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5
|
||||
// GFX11: encoding: [0xc1,0x08,0x60,0xcb,0xfd,0x0a,0x06,0xff]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
||||
v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4
|
||||
// GFX11: encoding: [0x7c,0x0a,0x60,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
|
||||
|
@ -2064,3 +2064,6 @@ ds_subrev_u64 v1, v[2:3]
|
||||
|
||||
ds_subrev_rtn_u64 v[5:6], v1, v[2:3]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_dot2c_f32_bf16 v5, v1, v2
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
@ -11111,3 +11111,111 @@
|
||||
|
||||
# GFX11: v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
|
||||
0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, v4, v2 :: v_dual_dot2acc_f32_bf16 v6, v1, v3 ; encoding: [0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff]
|
||||
0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, v1, v2 :: v_dual_dot2acc_f32_bf16 v6, v255, v3 ; encoding: [0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff]
|
||||
0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, v255, v2 :: v_dual_dot2acc_f32_bf16 v6, v2, v3 ; encoding: [0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff]
|
||||
0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, v2, v2 :: v_dual_dot2acc_f32_bf16 v6, v3, v3 ; encoding: [0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff]
|
||||
0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, v3, v2 :: v_dual_dot2acc_f32_bf16 v6, v4, v3 ; encoding: [0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff]
|
||||
0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, s105, v2 :: v_dual_dot2acc_f32_bf16 v6, s1, v3 ; encoding: [0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff]
|
||||
0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, s1, v2 :: v_dual_dot2acc_f32_bf16 v6, s105, v3 ; encoding: [0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff]
|
||||
0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff]
|
||||
0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff]
|
||||
0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff]
|
||||
0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, m0, v2 :: v_dual_dot2acc_f32_bf16 v6, m0, v3 ; encoding: [0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff]
|
||||
0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff]
|
||||
0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff]
|
||||
0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_dot2acc_f32_bf16 v6, null, v3 ; encoding: [0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf]
|
||||
0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, src_scc, v2 :: v_dual_dot2acc_f32_bf16 v6, -1, v3 ; encoding: [0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff]
|
||||
0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, 0.5, v3 :: v_dual_dot2acc_f32_bf16 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff]
|
||||
0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v255, -1, v4 :: v_dual_dot2acc_f32_bf16 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff]
|
||||
0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_add_f32 v6, null, v5 :: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v4 ; encoding: [0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]
|
||||
0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff]
|
||||
0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff]
|
||||
0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff]
|
||||
0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff]
|
||||
0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x48,0xcb,0x04,0x07,0x06,0xff]
|
||||
0x03,0x05,0x48,0xcb,0x04,0x07,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x48,0xcb,0x01,0x06,0x06,0xff]
|
||||
0x69,0x04,0x48,0xcb,0x01,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x48,0xcb,0x69,0x06,0x06,0xff]
|
||||
0x01,0x04,0x48,0xcb,0x69,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x48,0xcb,0x6a,0x06,0x06,0xff]
|
||||
0x7b,0x04,0x48,0xcb,0x6a,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x48,0xcb,0x6b,0x06,0x06,0xff]
|
||||
0x7f,0x04,0x48,0xcb,0x6b,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x48,0xcb,0x7b,0x06,0x06,0xff]
|
||||
0x7e,0x04,0x48,0xcb,0x7b,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x48,0xcb,0x7d,0x06,0x06,0xff]
|
||||
0x7d,0x04,0x48,0xcb,0x7d,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x48,0xcb,0x7e,0x06,0x06,0xff]
|
||||
0x6b,0x04,0x48,0xcb,0x7e,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x48,0xcb,0x7f,0x06,0x06,0xff]
|
||||
0x6a,0x04,0x48,0xcb,0x7f,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x48,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
|
||||
0xff,0x04,0x48,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x48,0xcb,0xc1,0x06,0x06,0xff]
|
||||
0xfd,0x04,0x48,0xcb,0xc1,0x06,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x48,0xcb,0xf0,0x04,0x06,0xff]
|
||||
0xf0,0x06,0x48,0xcb,0xf0,0x04,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x48,0xcb,0xfd,0x0a,0x06,0xff]
|
||||
0xc1,0x08,0x48,0xcb,0xfd,0x0a,0x06,0xff
|
||||
|
||||
# GFX11: v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x48,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
|
||||
0x7c,0x0a,0x48,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf
|
||||
|
Loading…
x
Reference in New Issue
Block a user