[AMDGPU] Add assembler/disassembler support for v_dual_dot2acc_f32_bf16 (#118984)

There is still no codegen support because the corresponding 
v_dot2c_f32_bf16 instruction is not supported on GFX11.
This commit is contained in:
Jay Foad 2024-12-09 09:47:22 +00:00 committed by GitHub
parent 62a25a4c7c
commit f9d6d46a8e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 335 additions and 12 deletions

View File

@ -1190,7 +1190,7 @@ let Constraints = "$vdst = $src2",
defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>;
let SubtargetPredicate = HasDot13Insts in
defm V_DOT2C_F32_BF16 : VOP2Inst<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16>;
defm V_DOT2C_F32_BF16 : VOP2Inst_VOPD<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16, 0xd, "v_dot2acc_f32_bf16">;
}
let AddedComplexity = 30 in {

View File

@ -87,12 +87,12 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
let mayRaiseFPException = ReadsModeReg;
// V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 need a dummy src2 tied to dst for
// passes to track its uses. Its presence does not affect VOPD formation rules
// because the rules for src2 and dst are the same. src2X and src2Y should not
// be encoded.
bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"));
bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"));
// V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 and V_DUAL_DOT2ACC_F32_BF16 need a
// dummy src2 tied to dst for passes to track its uses. Its presence does not
// affect VOPD formation rules because the rules for src2 and dst are the
// same. src2X and src2Y should not be encoded.
bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"), !eq(VDX.Mnemonic, "v_dot2c_f32_bf16"));
bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"), !eq(VDY.Mnemonic, "v_dot2c_f32_bf16"));
string ConstraintsX = !if(hasSrc2AccX, "$src2X = $vdstX", "");
string ConstraintsY = !if(hasSrc2AccY, "$src2Y = $vdstY", "");
let Constraints =
@ -125,16 +125,12 @@ class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
let FixedSize = 1;
}
// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is
// not. V_DUAL_DOT2C_F32_BF16 is a legal instruction on GFX12, but
// V_DOT2C_F32_F16_e32 is not. Since we generate the DUAL form by converting
// from the normal form we will never generate them.
defvar VOPDPseudosCommon = [
"V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
"V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
"V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32"
];
defvar VOPDPseudosGFX11 = ["V_DOT2C_F32_F16_e32"];
defvar VOPDPseudosGFX11 = ["V_DOT2C_F32_F16_e32", "V_DOT2C_F32_BF16_e32"];
defvar VOPDYOnlyPseudosCommon = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32",
"V_AND_B32_e32"];

View File

@ -14817,3 +14817,219 @@ v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5
v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4
// GFX11: encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, v4, v2 :: v_dual_dot2acc_f32_bf16 v6, v1, v3
// GFX11: encoding: [0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, v1, v2 :: v_dual_dot2acc_f32_bf16 v6, v255, v3
// GFX11: encoding: [0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, v255, v2 :: v_dual_dot2acc_f32_bf16 v6, v2, v3
// GFX11: encoding: [0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, v2, v2 :: v_dual_dot2acc_f32_bf16 v6, v3, v3
// GFX11: encoding: [0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, v3, v2 :: v_dual_dot2acc_f32_bf16 v6, v4, v3
// GFX11: encoding: [0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, s105, v2 :: v_dual_dot2acc_f32_bf16 v6, s1, v3
// GFX11: encoding: [0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, s1, v2 :: v_dual_dot2acc_f32_bf16 v6, s105, v3
// GFX11: encoding: [0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, ttmp15, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_lo, v3
// GFX11: encoding: [0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, exec_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_hi, v3
// GFX11: encoding: [0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, exec_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, ttmp15, v3
// GFX11: encoding: [0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, m0, v2 :: v_dual_dot2acc_f32_bf16 v6, m0, v3
// GFX11: encoding: [0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_lo, v3
// GFX11: encoding: [0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_hi, v3
// GFX11: encoding: [0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_dot2acc_f32_bf16 v6, null, v3
// GFX11: encoding: [0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, src_scc, v2 :: v_dual_dot2acc_f32_bf16 v6, -1, v3
// GFX11: encoding: [0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, 0.5, v3 :: v_dual_dot2acc_f32_bf16 v6, 0.5, v2
// GFX11: encoding: [0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v255, -1, v4 :: v_dual_dot2acc_f32_bf16 v6, src_scc, v5
// GFX11: encoding: [0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_add_f32 v6, null, v5 :: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v4
// GFX11: encoding: [0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3
// GFX11: encoding: [0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3
// GFX11: encoding: [0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3
// GFX11: encoding: [0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3
// GFX11: encoding: [0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3
// GFX11: encoding: [0x03,0x05,0x48,0xcb,0x04,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3
// GFX11: encoding: [0x69,0x04,0x48,0xcb,0x01,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3
// GFX11: encoding: [0x01,0x04,0x48,0xcb,0x69,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3
// GFX11: encoding: [0x7b,0x04,0x48,0xcb,0x6a,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3
// GFX11: encoding: [0x7f,0x04,0x48,0xcb,0x6b,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3
// GFX11: encoding: [0x7e,0x04,0x48,0xcb,0x7b,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3
// GFX11: encoding: [0x7d,0x04,0x48,0xcb,0x7d,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3
// GFX11: encoding: [0x6b,0x04,0x48,0xcb,0x7e,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3
// GFX11: encoding: [0x6a,0x04,0x48,0xcb,0x7f,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_f32 v6, null, v3
// GFX11: encoding: [0xff,0x04,0x48,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3
// GFX11: encoding: [0xfd,0x04,0x48,0xcb,0xc1,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2
// GFX11: encoding: [0xf0,0x06,0x48,0xcb,0xf0,0x04,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5
// GFX11: encoding: [0xc1,0x08,0x48,0xcb,0xfd,0x0a,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4
// GFX11: encoding: [0x7c,0x0a,0x48,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3
// GFX11: encoding: [0x04,0x05,0x60,0xcb,0x01,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3
// GFX11: encoding: [0x01,0x05,0x60,0xcb,0xff,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3
// GFX11: encoding: [0xff,0x05,0x60,0xcb,0x02,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3
// GFX11: encoding: [0x02,0x05,0x60,0xcb,0x03,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3
// GFX11: encoding: [0x03,0x05,0x60,0xcb,0x04,0x07,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3
// GFX11: encoding: [0x69,0x04,0x60,0xcb,0x01,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3
// GFX11: encoding: [0x01,0x04,0x60,0xcb,0x69,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3
// GFX11: encoding: [0x7b,0x04,0x60,0xcb,0x6a,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3
// GFX11: encoding: [0x7f,0x04,0x60,0xcb,0x6b,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3
// GFX11: encoding: [0x7e,0x04,0x60,0xcb,0x7b,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3
// GFX11: encoding: [0x7d,0x04,0x60,0xcb,0x7d,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3
// GFX11: encoding: [0x6b,0x04,0x60,0xcb,0x7e,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3
// GFX11: encoding: [0x6a,0x04,0x60,0xcb,0x7f,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_nc_u32 v6, null, v3
// GFX11: encoding: [0xff,0x04,0x60,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3
// GFX11: encoding: [0xfd,0x04,0x60,0xcb,0xc1,0x06,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2
// GFX11: encoding: [0xf0,0x06,0x60,0xcb,0xf0,0x04,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5
// GFX11: encoding: [0xc1,0x08,0x60,0xcb,0xfd,0x0a,0x06,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4
// GFX11: encoding: [0x7c,0x0a,0x60,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32

View File

@ -2064,3 +2064,6 @@ ds_subrev_u64 v1, v[2:3]
ds_subrev_rtn_u64 v[5:6], v1, v[2:3]
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_dot2c_f32_bf16 v5, v1, v2
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

View File

@ -11111,3 +11111,111 @@
# GFX11: v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf
# GFX11: v_dual_add_f32 v255, v4, v2 :: v_dual_dot2acc_f32_bf16 v6, v1, v3 ; encoding: [0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff]
0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff
# GFX11: v_dual_add_f32 v255, v1, v2 :: v_dual_dot2acc_f32_bf16 v6, v255, v3 ; encoding: [0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff]
0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff
# GFX11: v_dual_add_f32 v255, v255, v2 :: v_dual_dot2acc_f32_bf16 v6, v2, v3 ; encoding: [0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff]
0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff
# GFX11: v_dual_add_f32 v255, v2, v2 :: v_dual_dot2acc_f32_bf16 v6, v3, v3 ; encoding: [0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff]
0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff
# GFX11: v_dual_add_f32 v255, v3, v2 :: v_dual_dot2acc_f32_bf16 v6, v4, v3 ; encoding: [0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff]
0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff
# GFX11: v_dual_add_f32 v255, s105, v2 :: v_dual_dot2acc_f32_bf16 v6, s1, v3 ; encoding: [0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff]
0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff
# GFX11: v_dual_add_f32 v255, s1, v2 :: v_dual_dot2acc_f32_bf16 v6, s105, v3 ; encoding: [0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff]
0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff
# GFX11: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff]
0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff
# GFX11: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff]
0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff
# GFX11: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff]
0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff
# GFX11: v_dual_add_f32 v255, m0, v2 :: v_dual_dot2acc_f32_bf16 v6, m0, v3 ; encoding: [0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff]
0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff
# GFX11: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff]
0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff
# GFX11: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff]
0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff
# GFX11: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_dot2acc_f32_bf16 v6, null, v3 ; encoding: [0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf]
0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf
# GFX11: v_dual_add_f32 v255, src_scc, v2 :: v_dual_dot2acc_f32_bf16 v6, -1, v3 ; encoding: [0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff]
0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff
# GFX11: v_dual_add_f32 v255, 0.5, v3 :: v_dual_dot2acc_f32_bf16 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff]
0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff
# GFX11: v_dual_add_f32 v255, -1, v4 :: v_dual_dot2acc_f32_bf16 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff]
0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff
# GFX11: v_dual_add_f32 v6, null, v5 :: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v4 ; encoding: [0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]
0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00
# GFX11: v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff]
0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff]
0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff]
0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff]
0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x48,0xcb,0x04,0x07,0x06,0xff]
0x03,0x05,0x48,0xcb,0x04,0x07,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x48,0xcb,0x01,0x06,0x06,0xff]
0x69,0x04,0x48,0xcb,0x01,0x06,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x48,0xcb,0x69,0x06,0x06,0xff]
0x01,0x04,0x48,0xcb,0x69,0x06,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x48,0xcb,0x6a,0x06,0x06,0xff]
0x7b,0x04,0x48,0xcb,0x6a,0x06,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x48,0xcb,0x6b,0x06,0x06,0xff]
0x7f,0x04,0x48,0xcb,0x6b,0x06,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x48,0xcb,0x7b,0x06,0x06,0xff]
0x7e,0x04,0x48,0xcb,0x7b,0x06,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x48,0xcb,0x7d,0x06,0x06,0xff]
0x7d,0x04,0x48,0xcb,0x7d,0x06,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x48,0xcb,0x7e,0x06,0x06,0xff]
0x6b,0x04,0x48,0xcb,0x7e,0x06,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x48,0xcb,0x7f,0x06,0x06,0xff]
0x6a,0x04,0x48,0xcb,0x7f,0x06,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x48,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
0xff,0x04,0x48,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00
# GFX11: v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x48,0xcb,0xc1,0x06,0x06,0xff]
0xfd,0x04,0x48,0xcb,0xc1,0x06,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x48,0xcb,0xf0,0x04,0x06,0xff]
0xf0,0x06,0x48,0xcb,0xf0,0x04,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x48,0xcb,0xfd,0x0a,0x06,0xff]
0xc1,0x08,0x48,0xcb,0xfd,0x0a,0x06,0xff
# GFX11: v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x48,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
0x7c,0x0a,0x48,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf