[AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (#151415)

This commit is contained in:
Stanislav Mekhanoshin 2025-07-30 17:24:45 -07:00 committed by GitHub
parent a8d0ae3412
commit ce40863209
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 1710 additions and 4 deletions

View File

@ -702,6 +702,10 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")

View File

@ -398,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
}
// CHECK-LABEL: @test_cvt_pk_bf8_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
// CHECK-NEXT: ret void
//
void test_cvt_pk_bf8_f16(global short* out, half2 a)
{
*out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
}
// CHECK-LABEL: @test_cvt_pk_fp8_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
// CHECK-NEXT: ret void
//
void test_cvt_pk_fp8_f16(global short* out, half2 a)
{
*out = __builtin_amdgcn_cvt_pk_fp8_f16(a);
}
// CHECK-LABEL: @test_cvt_sr_bf8_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5)
// CHECK-NEXT: [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 0)
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP5]], i32 [[TMP6]], i32 [[TMP7]], i32 1)
// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
// CHECK-NEXT: [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP10]], i32 [[TMP11]], i32 [[TMP12]], i32 2)
// CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP15]], i32 [[TMP16]], i32 [[TMP17]], i32 3)
// CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
// CHECK-NEXT: ret void
//
void test_cvt_sr_bf8_f16(global int* out, half a, uint sr, int old)
{
*out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 0);
*out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 1);
*out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 2);
*out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 3);
}
// CHECK-LABEL: @test_cvt_sr_fp8_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5)
// CHECK-NEXT: [[SR_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: store i16 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 2
// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP0]], i32 [[CONV]], i32 [[TMP2]], i32 0)
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[TMP6]] to i32
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP5]], i32 [[CONV1]], i32 [[TMP7]], i32 1)
// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
// CHECK-NEXT: [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP11]] to i32
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP10]], i32 [[CONV2]], i32 [[TMP12]], i32 2)
// CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP16:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP16]] to i32
// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP15]], i32 [[CONV3]], i32 [[TMP17]], i32 3)
// CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
// CHECK-NEXT: ret void
//
void test_cvt_sr_fp8_f16(global int* out, half a, short sr, int old)
{
*out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 0);
*out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 1);
*out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 2);
*out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 3);
}
// CHECK-LABEL: @test_sat_pk4_i4_i8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)

View File

@ -605,6 +605,30 @@ def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
def int_amdgcn_cvt_pk_fp8_f16
: DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
[IntrNoMem, IntrSpeculatable]>,
ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
def int_amdgcn_cvt_pk_bf8_f16
: DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
[IntrNoMem, IntrSpeculatable]>,
ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
// llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
// byte_sel selects byte to write in vdst.
def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic<
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">;
// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
// byte_sel selects byte to write in vdst.
def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic<
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">;
class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
[DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

View File

@ -4577,6 +4577,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
case Intrinsic::amdgcn_cvt_pk_f16_bf8:
case Intrinsic::amdgcn_cvt_pk_fp8_f16:
case Intrinsic::amdgcn_cvt_pk_bf8_f16:
case Intrinsic::amdgcn_cvt_sr_fp8_f16:
case Intrinsic::amdgcn_cvt_sr_bf8_f16:
case Intrinsic::amdgcn_sat_pk4_i4_i8:
case Intrinsic::amdgcn_sat_pk4_u4_u8:
case Intrinsic::amdgcn_fmed3:

View File

@ -689,6 +689,8 @@ public:
bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
bool isVISrcB32() const {
return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
}
@ -2036,6 +2038,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
case AMDGPU::OPERAND_KIMM16:
return &APFloat::IEEEhalf();
case AMDGPU::OPERAND_REG_IMM_BF16:
@ -2405,6 +2408,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_KIMM32:
@ -2456,6 +2460,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
setImmKindConst();
return;
}
[[fallthrough]];
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
setImmKindLiteral();
@ -3761,6 +3768,9 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)
return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
if (OperandType == AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16)
return false;
llvm_unreachable("invalid operand type");
}
default:
@ -9421,7 +9431,19 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
Inst.addOperand(Inst.getOperand(0));
}

View File

@ -540,6 +540,8 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
printImmediateBFloat16(static_cast<uint16_t>(Imm), STI, O))
return;
break;
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
break;
default:
llvm_unreachable("bad operand type");
}
@ -770,6 +772,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:

View File

@ -341,6 +341,9 @@ std::optional<uint64_t> AMDGPUMCCodeEmitter::getLitEncoding(
return AMDGPU::getInlineEncodingV2BF16(static_cast<uint32_t>(Imm))
.value_or(255);
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
return 255;
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
case AMDGPU::OPERAND_KIMM64:

View File

@ -208,6 +208,7 @@ enum OperandType : unsigned {
OPERAND_REG_IMM_V2BF16,
OPERAND_REG_IMM_V2FP16,
OPERAND_REG_IMM_V2INT16,
OPERAND_REG_IMM_NOINLINE_V2FP16,
OPERAND_REG_IMM_V2INT32,
OPERAND_REG_IMM_V2FP32,

View File

@ -468,6 +468,7 @@ bool SIFoldOperandsImpl::canUseImmWithOpSel(const MachineInstr *MI,
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:

View File

@ -4438,6 +4438,8 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const {
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
return AMDGPU::isInlinableLiteralV2BF16(Imm);
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
return false;
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {

View File

@ -2859,6 +2859,7 @@ def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
def VOP_BF16_BF16 : VOPProfile<[bf16, bf16, untyped, untyped]>;
def VOP1_I16_I32 : VOPProfile<[i16, i32, untyped, untyped]>;
def VOP_I16_V2F16 : VOPProfile<[i16, v2f16, untyped, untyped]>;
def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;

View File

@ -1218,6 +1218,8 @@ def VSrc_f64 : SrcRegOrImm9 <VS_64, "OPERAND_REG_IMM_FP64"> {
def VSrc_v2b32 : SrcRegOrImm9 <VS_64, "OPERAND_REG_IMM_V2INT32">;
def VSrc_v2f32 : SrcRegOrImm9 <VS_64, "OPERAND_REG_IMM_V2FP32">;
def VSrc_NoInline_v2f16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_IMM_NOINLINE_V2FP16">;
//===----------------------------------------------------------------------===//
// VRegSrc_* Operands with a VGPR
//===----------------------------------------------------------------------===//

View File

@ -2659,6 +2659,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
@ -3023,6 +3024,8 @@ bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
return isInlinableLiteralV2BF16(Literal);
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
return false;
default:
llvm_unreachable("bad packed operand type");
}

View File

@ -1636,6 +1636,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
return 2;
default:

View File

@ -1623,6 +1623,53 @@ let SubtargetPredicate = HasBF16ConversionInsts in {
(V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
}
let Src0RC64 = VSrc_NoInline_v2f16 in {
def VOP3_CVT_PK_F8_F16_Profile : VOP3_Profile<VOP_I16_V2F16>;
def VOP3_CVT_PK_F8_F16_True16_Profile : VOP3_Profile_True16<VOP3_CVT_PK_F8_F16_Profile>;
def VOP3_CVT_PK_F8_F16_Fake16_Profile : VOP3_Profile_Fake16<VOP3_CVT_PK_F8_F16_Profile>;
}
let ReadsModeReg = 0, IsPacked = 0, SubtargetPredicate = isGFX125xOnly in {
defm V_CVT_PK_FP8_F16_gfx1250 : VOP3Inst_t16_with_profiles<"v_cvt_pk_fp8_f16_gfx1250",
VOP3_CVT_PK_F8_F16_Profile,
VOP3_CVT_PK_F8_F16_True16_Profile,
VOP3_CVT_PK_F8_F16_Fake16_Profile,
int_amdgcn_cvt_pk_fp8_f16>;
defm V_CVT_PK_BF8_F16_gfx1250 : VOP3Inst_t16_with_profiles<"v_cvt_pk_bf8_f16_gfx1250",
VOP3_CVT_PK_F8_F16_Profile,
VOP3_CVT_PK_F8_F16_True16_Profile,
VOP3_CVT_PK_F8_F16_Fake16_Profile,
int_amdgcn_cvt_pk_bf8_f16>;
}
let HasClamp = 0, HasOpSel = 1 in {
def VOP3_CVT_SR_F8_F16_Profile : VOP3_CVT_SR_F8_ByteSel_Profile<f16>;
def VOP3_CVT_SR_F8_F16_True16_Profile : VOP3_Profile_True16<VOP3_CVT_SR_F8_F16_Profile>;
def VOP3_CVT_SR_F8_F16_Fake16_Profile : VOP3_Profile_Fake16<VOP3_CVT_SR_F8_F16_Profile>;
}
let SubtargetPredicate = isGFX1250Plus in {
let ReadsModeReg = 0 in {
// These instructions have non-standard use of op_sel. They are using bits 2 and 3 of opsel
// to select a byte in the vdst. Bits 0 and 1 are unused.
let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
defm V_CVT_SR_FP8_F16 : VOP3Inst_t16_with_profiles<"v_cvt_sr_fp8_f16", VOP3_CVT_SR_F8_F16_Profile,
VOP3_CVT_SR_F8_F16_True16_Profile, VOP3_CVT_SR_F8_F16_Fake16_Profile>;
defm V_CVT_SR_BF8_F16 : VOP3Inst_t16_with_profiles<"v_cvt_sr_bf8_f16", VOP3_CVT_SR_F8_F16_Profile,
VOP3_CVT_SR_F8_F16_True16_Profile, VOP3_CVT_SR_F8_F16_Fake16_Profile>;
}
} // End ReadsModeReg = 0
let True16Predicate = UseRealTrue16Insts in {
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f16, V_CVT_SR_FP8_F16_t16_e64, f16>;
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f16, V_CVT_SR_BF8_F16_t16_e64, f16>;
}
let True16Predicate = UseFakeTrue16Insts in {
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f16, V_CVT_SR_FP8_F16_fake16_e64, f16>;
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f16, V_CVT_SR_BF8_F16_fake16_e64, f16>;
}
} // End SubtargetPredicate = isGFX1250Plus
class Cvt_Scale_Sr_F32ToBF16F16_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType DstTy> : GCNPat<
(DstTy (node DstTy:$vdst_in, f32:$src0, i32:$src1, timm:$word_sel)),
(inst (DstSelToOpSelXForm $word_sel), $src0, 0, $src1, VGPR_32:$vdst_in)
@ -2019,6 +2066,10 @@ defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>;
defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>;
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
defm V_CVT_PK_FP8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x372, "v_cvt_pk_fp8_f16">;
defm V_CVT_PK_BF8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x373, "v_cvt_pk_bf8_f16">;
defm V_CVT_SR_FP8_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x374>;
defm V_CVT_SR_BF8_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x375>;
//===----------------------------------------------------------------------===//
// GFX10.

View File

@ -2071,6 +2071,15 @@ multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> :
multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> :
VOP3_Realtriple<GFX1250Gen, op, isSingle>;
multiclass VOP3Only_Realtriple_with_name_gfx1250<bits<10> op, string opName,
string asmName, string pseudo_mnemonic = "",
bit isSingle = 0> :
VOP3_Realtriple_with_name<GFX1250Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
multiclass VOP3Only_Realtriple_t16_gfx1250<bits<10> op, string asmName = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
string opName = NAME, string pseudo_mnemonic = "", bit isSingle = 0> :
VOP3Only_Realtriple_with_name_gfx1250<op, opName, asmName, pseudo_mnemonic, isSingle>;
multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> :
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
@ -2091,6 +2100,13 @@ multiclass VOP3Only_Realtriple_t16_and_fake16_gfx12<bits<10> op, string asmName,
defm _fake16 : VOP3Only_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic>;
}
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx1250<bits<10> op,
string asmName = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
string opName = NAME, string pseudo_mnemonic = ""> {
defm _t16 : VOP3Only_Realtriple_t16_gfx1250<op, asmName, opName#"_t16", pseudo_mnemonic>;
defm _fake16 : VOP3Only_Realtriple_t16_gfx1250<op, asmName, opName#"_fake16", pseudo_mnemonic>;
}
multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
string asmName, bit isSingle = 0> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");

View File

@ -0,0 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
define i16 @cvt_pk_bf8_f16_v(ptr addrspace(1) %out) {
; GFX1250-LABEL: cvt_pk_bf8_f16_v:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1250-NEXT: v_cvt_pk_bf8_f16 v0, 0x38003800 ; encoding: [0x00,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x38]
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> <half 0xH3800, half 0xH3800>)
ret i16 %cvt
}
; GFX1250: codeLenInByte = 24
define i16 @cvt_pk_fp8_f16_v(ptr addrspace(1) %out) {
; GFX1250-LABEL: cvt_pk_fp8_f16_v:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1250-NEXT: v_cvt_pk_fp8_f16 v0, 0x3800 ; encoding: [0x00,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> <half 0xH3800, half 0xH0>)
ret i16 %cvt
}
; GFX1250: codeLenInByte = 24

View File

@ -0,0 +1,539 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-REAL16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s
declare i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half>)
declare i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half>)
declare i32 @llvm.amdgcn.cvt.sr.bf8.f16(half, i32, i32, i32)
declare i32 @llvm.amdgcn.cvt.sr.fp8.f16(half, i32, i32, i32)
define amdgpu_ps void @test_cvt_pk_bf8_f16_v(<2 x half> %a, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_v:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_bf8_f16 v0, v0
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_v:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_bf8_f16 v0, v0
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[4:5], v0, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> %a)
store i16 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_pk_bf8_f16_s(<2 x half> inreg %a, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, s0
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, s0
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> %a)
store i16 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_pk_bf8_f16_l(ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, 0x56400000
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, 0x56400000
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> <half 0.0, half 100.0>)
store i16 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_pk_fp8_f16_v(<2 x half> %a, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_v:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_fp8_f16 v0, v0
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_v:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_fp8_f16 v0, v0
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[4:5], v0, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> %a)
store i16 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_pk_fp8_f16_s(<2 x half> inreg %a, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, s0
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, s0
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> %a)
store i16 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_pk_fp8_f16_l(ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, 0x56400000
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, 0x56400000
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> <half 0.0, half 100.0>)
store i16 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_bf8_f16_byte0(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte0:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte0:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte0:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte0:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 0)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_bf8_f16_byte1(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte1:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:1
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte1:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:1
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte1:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:1
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte1:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:1
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 1)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_bf8_f16_byte2(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte2:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:2
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte2:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:2
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte2:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:2
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte2:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:2
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 2)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_bf8_f16_byte3(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte3:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:3
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte3:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:3
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte3:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:3
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte3:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:3
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 3)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_bf8_f16_hi_byte0(<2 x half> %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_hi_byte0:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.h, v1
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_hi_byte0:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_lshrrev_b32 v0, 16, v0
; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_hi_byte0:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3
; GFX1250-GISEL-REAL16-NEXT: v_mov_b32_e32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_hi_byte0:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3
; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%a.1 = extractelement <2 x half> %a, i32 1
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a.1, i32 %sr, i32 %old, i32 0)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_fp8_f16_byte0(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte0:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte0:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte0:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte0:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 0)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_fp8_f16_byte1(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte1:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:1
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte1:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:1
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte1:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:1
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte1:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:1
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 1)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_fp8_f16_byte2(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte2:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:2
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte2:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:2
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte2:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:2
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte2:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:2
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 2)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_fp8_f16_byte3(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte3:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:3
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte3:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:3
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte3:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:3
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte3:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:3
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 3)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @test_cvt_sr_fp8_f16_hi_byte0(<2 x half> %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_hi_byte0:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.h, v1
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_hi_byte0:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_lshrrev_b32 v0, 16, v0
; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v4, v3
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_hi_byte0:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3
; GFX1250-GISEL-REAL16-NEXT: v_mov_b32_e32 v7, v4
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_hi_byte0:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3
; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v7, v4
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
%a.1 = extractelement <2 x half> %a, i32 1
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a.1, i32 %sr, i32 %old, i32 0)
store i32 %cvt, ptr addrspace(1) %out
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX1250: {{.*}}

View File

@ -402,3 +402,148 @@ v_ashr_pk_u8_i32 v2, v4, v7, 12345
v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,0]
// GFX1250: v_cvt_pk_bf8_f16 v1, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,1]
// GFX1250: v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_bf8_f16 v1, v2 clamp
// GFX1250: v_cvt_pk_bf8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_bf8_f16 v1, s2
// GFX1250: v_cvt_pk_bf8_f16 v1, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00]
v_cvt_pk_bf8_f16 v1, 100.0
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
// Inline constants are not supported by v_cvt_pk_bf8_f16
v_cvt_pk_bf8_f16 v1, 1
// GFX1250: v_cvt_pk_bf8_f16 v1, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
v_cvt_pk_bf8_f16 v1, 0x3800
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
v_cvt_pk_bf8_f16 v1, 0.5
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
v_cvt_pk_bf8_f16 v1, 0x3118
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
v_cvt_pk_bf8_f16 v1, 0.15915494
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,0]
// GFX1250: v_cvt_pk_fp8_f16 v1, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,1]
// GFX1250: v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_fp8_f16 v1, v2 clamp
// GFX1250: v_cvt_pk_fp8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_fp8_f16 v1, s2
// GFX1250: v_cvt_pk_fp8_f16 v1, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00]
v_cvt_pk_fp8_f16 v1, 100.0
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
// Inline constants are not supported by v_cvt_pk_fp8_f16
v_cvt_pk_fp8_f16 v1, 1
// GFX1250: v_cvt_pk_fp8_f16 v1, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
v_cvt_pk_fp8_f16 v1, 0x3800
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
v_cvt_pk_fp8_f16 v1, 0.5
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
v_cvt_pk_fp8_f16 v1, 0x3118
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
v_cvt_pk_fp8_f16 v1, 0.15915494
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1]
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:0
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, s3
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00]
v_cvt_sr_bf8_f16 v1, v2, 0x1234
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
v_cvt_sr_bf8_f16 v1, -v2, v3
// GFX1250: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20]
v_cvt_sr_bf8_f16 v1, |v2|, v3
// GFX1250: v_cvt_sr_bf8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, |v2|, v3 op_sel:[1]
// GFX1250: v_cvt_sr_bf8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:1
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:2
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:3
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, v3
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1]
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, s3
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00]
v_cvt_sr_fp8_f16 v1, v2, 0x1234
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
v_cvt_sr_fp8_f16 v1, -v2, v3
// GFX1250: v_cvt_sr_fp8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20]
v_cvt_sr_fp8_f16 v1, |v2|, v3
// GFX1250: v_cvt_sr_fp8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, |v2|, v3 op_sel:[1]
// GFX1250: v_cvt_sr_fp8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:1
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:2
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:3
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00]

View File

@ -402,3 +402,148 @@ v_ashr_pk_u8_i32 v2, v4, v7, 12345
v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
v_cvt_pk_bf8_f16 v1.l, v2
// GFX1250: v_cvt_pk_bf8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_bf8_f16 v1.h, v2
// GFX1250: v_cvt_pk_bf8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_bf8_f16 v0.l, v2 clamp
// GFX1250: v_cvt_pk_bf8_f16 v0.l, v2 clamp ; encoding: [0x00,0x80,0x73,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_bf8_f16 v1.l, s2
// GFX1250: v_cvt_pk_bf8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00]
v_cvt_pk_bf8_f16 v1.l, 100.0
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
// Inline constants are not supported by v_cvt_pk_bf8_f16
v_cvt_pk_bf8_f16 v1.l, 1
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
v_cvt_pk_bf8_f16 v1.l, 0x3800
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
v_cvt_pk_bf8_f16 v1.l, 0.5
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
v_cvt_pk_bf8_f16 v1.l, 0x3118
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
v_cvt_pk_bf8_f16 v1.l, 0.15915494
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
v_cvt_pk_fp8_f16 v1.l, v2
// GFX1250: v_cvt_pk_fp8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_fp8_f16 v1.h, v2
// GFX1250: v_cvt_pk_fp8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_fp8_f16 v1.l, v2 clamp
// GFX1250: v_cvt_pk_fp8_f16 v1.l, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00]
v_cvt_pk_fp8_f16 v1.l, s2
// GFX1250: v_cvt_pk_fp8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00]
v_cvt_pk_fp8_f16 v1.l, 100.0
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
// Inline constants are not supported by v_cvt_pk_fp8_f16
v_cvt_pk_fp8_f16 v1.l, 1
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
v_cvt_pk_fp8_f16 v1.l, 0x3800
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
v_cvt_pk_fp8_f16 v1.l, 0.5
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
v_cvt_pk_fp8_f16 v1.l, 0x3118
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
v_cvt_pk_fp8_f16 v1.l, 0.15915494
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
v_cvt_sr_bf8_f16 v1, v2.l, v3
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2.h, v3
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:0
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2, s3
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00]
v_cvt_sr_bf8_f16 v1, v2, 0x1234
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
v_cvt_sr_bf8_f16 v1, -v2, v3
// GFX1250: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20]
v_cvt_sr_bf8_f16 v1, |v2.l|, v3
// GFX1250: v_cvt_sr_bf8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, |v2.h|, v3
// GFX1250: v_cvt_sr_bf8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:1
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:2
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:3
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2.l, v3
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2.h, v3
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2, s3
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00]
v_cvt_sr_fp8_f16 v1, v2, 0x1234
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
v_cvt_sr_fp8_f16 v1, -v2, v3
// GFX1250: v_cvt_sr_fp8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20]
v_cvt_sr_fp8_f16 v1, |v2.l|, v3
// GFX1250: v_cvt_sr_fp8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, |v2.h|, v3
// GFX1250: v_cvt_sr_fp8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:1
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:2
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:3
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00]

View File

@ -329,3 +329,67 @@ v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,0] quad_perm:[1,2,3,0]
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,0] quad_perm:[1,2,3,0]
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 quad_perm:[0,1,2,3] fi:1
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] quad_perm:[0,1,2,3] fi:1
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 quad_perm:[0,1,2,3] fi:1
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] quad_perm:[0,1,2,3] fi:1
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

View File

@ -329,3 +329,67 @@ v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_bf8_f16 v1.l, v2 quad_perm:[1,2,3,0]
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 row_share:0 row_mask:0x5 bank_mask:0x3 fi:1
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16 v1.l, v2 quad_perm:[1,2,3,0]
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 row_share:0 row_mask:0x5 bank_mask:0x3 fi:1
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2.l, v3 quad_perm:[0,1,2,3] fi:1
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2.h, v3 quad_perm:[0,1,2,3] fi:1
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:3 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2.l, v3 quad_perm:[0,1,2,3] fi:1
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2.h, v3 quad_perm:[0,1,2,3] fi:1
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:3 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

View File

@ -225,3 +225,67 @@ v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_bf8_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] dpp8:[1,2,3,4,5,6,7,0] fi:1
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] dpp8:[1,2,3,4,5,6,7,0] fi:1
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

View File

@ -225,3 +225,67 @@ v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_bf8_f16 v1.l, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16 v1.l, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2.h, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2.h, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

View File

@ -96,3 +96,28 @@ v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX125X-ERR-NEXT:{{^}}v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
// GFX125X-ERR-NEXT:{{^}} ^
v_cvt_sr_bf8_f16 v1, v2, v3 clamp
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_bf8_f16 v1, v2, v3 clamp
// GFX125X-ERR-NEXT:{{^}} ^
v_cvt_sr_bf8_f16 v1, v2, v3 mul:2
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_bf8_f16 v1, v2, v3 mul:2
// GFX125X-ERR-NEXT:{{^}} ^
v_cvt_sr_fp8_f16 v1, v2, v3 clamp
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_fp8_f16 v1, v2, v3 clamp
// GFX125X-ERR-NEXT:{{^}} ^
v_cvt_sr_fp8_f16 v1, v2, v3 mul:2
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_fp8_f16 v1, v2, v3 mul:2
// GFX125X-ERR-NEXT:{{^}} ^
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:4
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid byte_sel value.
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:4
// GFX125X-ERR-NEXT:{{^}} ^

View File

@ -422,6 +422,170 @@
0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04
# GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
# GFX1250-FAKE16: {{.*}}
# GFX1250-REAL16: {{.*}}
0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00]
0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00]
0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, v2 clamp ; encoding: [0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00]
0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00]
0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00]
0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00]
0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00]
0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00]
0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, -v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20]
0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00]
0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00]
0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00]
0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, -v2.l, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20]
0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00]
0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00]
0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00]
0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00]
0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00]
0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00]
0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00]
0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00]
0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00]
0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00]

View File

@ -269,3 +269,67 @@
0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff
# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53
# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff
# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53
# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]

View File

@ -187,3 +187,67 @@
0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05
# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05
# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05
# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05
# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]