[AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (#151415)
This commit is contained in:
parent
a8d0ae3412
commit
ce40863209
@ -702,6 +702,10 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", "gfx1250-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", "gfx1250-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
|
||||
|
||||
|
@ -398,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
|
||||
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_cvt_pk_bf8_f16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
||||
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_cvt_pk_bf8_f16(global short* out, half2 a)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_cvt_pk_fp8_f16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
||||
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_cvt_pk_fp8_f16(global short* out, half2 a)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_pk_fp8_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_cvt_sr_bf8_f16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5)
|
||||
// CHECK-NEXT: [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
||||
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
||||
// CHECK-NEXT: [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
|
||||
// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 0)
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP5]], i32 [[TMP6]], i32 [[TMP7]], i32 1)
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP10]], i32 [[TMP11]], i32 [[TMP12]], i32 2)
|
||||
// CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
|
||||
// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP15]], i32 [[TMP16]], i32 [[TMP17]], i32 3)
|
||||
// CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_cvt_sr_bf8_f16(global int* out, half a, uint sr, int old)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 0);
|
||||
*out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 1);
|
||||
*out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 2);
|
||||
*out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_cvt_sr_fp8_f16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5)
|
||||
// CHECK-NEXT: [[SR_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
|
||||
// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
||||
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
||||
// CHECK-NEXT: [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
|
||||
// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: store i16 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP0]], i32 [[CONV]], i32 [[TMP2]], i32 0)
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[TMP6]] to i32
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP5]], i32 [[CONV1]], i32 [[TMP7]], i32 1)
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP11]] to i32
|
||||
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP10]], i32 [[CONV2]], i32 [[TMP12]], i32 2)
|
||||
// CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
|
||||
// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[TMP16:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
|
||||
// CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP16]] to i32
|
||||
// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP15]], i32 [[CONV3]], i32 [[TMP17]], i32 3)
|
||||
// CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_cvt_sr_fp8_f16(global int* out, half a, short sr, int old)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 0);
|
||||
*out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 1);
|
||||
*out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 2);
|
||||
*out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_sat_pk4_i4_i8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
|
@ -605,6 +605,30 @@ def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
|
||||
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
|
||||
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
|
||||
|
||||
def int_amdgcn_cvt_pk_fp8_f16
|
||||
: DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
|
||||
[IntrNoMem, IntrSpeculatable]>,
|
||||
ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
|
||||
|
||||
def int_amdgcn_cvt_pk_bf8_f16
|
||||
: DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
|
||||
[IntrNoMem, IntrSpeculatable]>,
|
||||
ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
|
||||
|
||||
// llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
|
||||
// byte_sel selects byte to write in vdst.
|
||||
def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic<
|
||||
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
|
||||
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">;
|
||||
|
||||
// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
|
||||
// byte_sel selects byte to write in vdst.
|
||||
def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic<
|
||||
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
|
||||
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">;
|
||||
|
||||
class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
|
||||
[DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
|
||||
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
|
||||
|
@ -4577,6 +4577,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:
|
||||
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
|
||||
case Intrinsic::amdgcn_cvt_pk_f16_bf8:
|
||||
case Intrinsic::amdgcn_cvt_pk_fp8_f16:
|
||||
case Intrinsic::amdgcn_cvt_pk_bf8_f16:
|
||||
case Intrinsic::amdgcn_cvt_sr_fp8_f16:
|
||||
case Intrinsic::amdgcn_cvt_sr_bf8_f16:
|
||||
case Intrinsic::amdgcn_sat_pk4_i4_i8:
|
||||
case Intrinsic::amdgcn_sat_pk4_u4_u8:
|
||||
case Intrinsic::amdgcn_fmed3:
|
||||
|
@ -689,6 +689,8 @@ public:
|
||||
|
||||
bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
|
||||
|
||||
bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
|
||||
|
||||
bool isVISrcB32() const {
|
||||
return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
|
||||
}
|
||||
@ -2036,6 +2038,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
case AMDGPU::OPERAND_KIMM16:
|
||||
return &APFloat::IEEEhalf();
|
||||
case AMDGPU::OPERAND_REG_IMM_BF16:
|
||||
@ -2405,6 +2408,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2FP32:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2INT32:
|
||||
case AMDGPU::OPERAND_KIMM32:
|
||||
@ -2456,6 +2460,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
|
||||
setImmKindConst();
|
||||
return;
|
||||
}
|
||||
[[fallthrough]];
|
||||
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
|
||||
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
|
||||
setImmKindLiteral();
|
||||
@ -3761,6 +3768,9 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
|
||||
OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)
|
||||
return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
|
||||
|
||||
if (OperandType == AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16)
|
||||
return false;
|
||||
|
||||
llvm_unreachable("invalid operand type");
|
||||
}
|
||||
default:
|
||||
@ -9421,7 +9431,19 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
|
||||
Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
|
||||
Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
|
||||
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
|
||||
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
|
||||
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
|
||||
Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
|
||||
Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
|
||||
Inst.addOperand(Inst.getOperand(0));
|
||||
}
|
||||
|
||||
|
@ -540,6 +540,8 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
|
||||
printImmediateBFloat16(static_cast<uint16_t>(Imm), STI, O))
|
||||
return;
|
||||
break;
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("bad operand type");
|
||||
}
|
||||
@ -770,6 +772,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
|
||||
case AMDGPU::OPERAND_REG_IMM_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2BF16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
|
||||
|
@ -341,6 +341,9 @@ std::optional<uint64_t> AMDGPUMCCodeEmitter::getLitEncoding(
|
||||
return AMDGPU::getInlineEncodingV2BF16(static_cast<uint32_t>(Imm))
|
||||
.value_or(255);
|
||||
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
return 255;
|
||||
|
||||
case AMDGPU::OPERAND_KIMM32:
|
||||
case AMDGPU::OPERAND_KIMM16:
|
||||
case AMDGPU::OPERAND_KIMM64:
|
||||
|
@ -208,6 +208,7 @@ enum OperandType : unsigned {
|
||||
OPERAND_REG_IMM_V2BF16,
|
||||
OPERAND_REG_IMM_V2FP16,
|
||||
OPERAND_REG_IMM_V2INT16,
|
||||
OPERAND_REG_IMM_NOINLINE_V2FP16,
|
||||
OPERAND_REG_IMM_V2INT32,
|
||||
OPERAND_REG_IMM_V2FP32,
|
||||
|
||||
|
@ -468,6 +468,7 @@ bool SIFoldOperandsImpl::canUseImmWithOpSel(const MachineInstr *MI,
|
||||
case AMDGPU::OPERAND_REG_IMM_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2BF16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
|
||||
|
@ -4438,6 +4438,8 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const {
|
||||
case AMDGPU::OPERAND_REG_IMM_V2BF16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
|
||||
return AMDGPU::isInlinableLiteralV2BF16(Imm);
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
return false;
|
||||
case AMDGPU::OPERAND_REG_IMM_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
|
||||
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
|
||||
|
@ -2859,6 +2859,7 @@ def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
|
||||
def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
|
||||
def VOP_BF16_BF16 : VOPProfile<[bf16, bf16, untyped, untyped]>;
|
||||
def VOP1_I16_I32 : VOPProfile<[i16, i32, untyped, untyped]>;
|
||||
def VOP_I16_V2F16 : VOPProfile<[i16, v2f16, untyped, untyped]>;
|
||||
|
||||
def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
|
||||
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
|
||||
|
@ -1218,6 +1218,8 @@ def VSrc_f64 : SrcRegOrImm9 <VS_64, "OPERAND_REG_IMM_FP64"> {
|
||||
def VSrc_v2b32 : SrcRegOrImm9 <VS_64, "OPERAND_REG_IMM_V2INT32">;
|
||||
def VSrc_v2f32 : SrcRegOrImm9 <VS_64, "OPERAND_REG_IMM_V2FP32">;
|
||||
|
||||
def VSrc_NoInline_v2f16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_IMM_NOINLINE_V2FP16">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VRegSrc_* Operands with a VGPR
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2659,6 +2659,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
|
||||
case AMDGPU::OPERAND_REG_IMM_FP64:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
|
||||
@ -3023,6 +3024,8 @@ bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
|
||||
case AMDGPU::OPERAND_REG_IMM_V2BF16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
|
||||
return isInlinableLiteralV2BF16(Literal);
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
return false;
|
||||
default:
|
||||
llvm_unreachable("bad packed operand type");
|
||||
}
|
||||
|
@ -1636,6 +1636,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
|
||||
case AMDGPU::OPERAND_REG_IMM_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2BF16:
|
||||
case AMDGPU::OPERAND_REG_IMM_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
|
||||
return 2;
|
||||
|
||||
default:
|
||||
|
@ -1623,6 +1623,53 @@ let SubtargetPredicate = HasBF16ConversionInsts in {
|
||||
(V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
|
||||
}
|
||||
|
||||
let Src0RC64 = VSrc_NoInline_v2f16 in {
|
||||
def VOP3_CVT_PK_F8_F16_Profile : VOP3_Profile<VOP_I16_V2F16>;
|
||||
def VOP3_CVT_PK_F8_F16_True16_Profile : VOP3_Profile_True16<VOP3_CVT_PK_F8_F16_Profile>;
|
||||
def VOP3_CVT_PK_F8_F16_Fake16_Profile : VOP3_Profile_Fake16<VOP3_CVT_PK_F8_F16_Profile>;
|
||||
}
|
||||
|
||||
let ReadsModeReg = 0, IsPacked = 0, SubtargetPredicate = isGFX125xOnly in {
|
||||
defm V_CVT_PK_FP8_F16_gfx1250 : VOP3Inst_t16_with_profiles<"v_cvt_pk_fp8_f16_gfx1250",
|
||||
VOP3_CVT_PK_F8_F16_Profile,
|
||||
VOP3_CVT_PK_F8_F16_True16_Profile,
|
||||
VOP3_CVT_PK_F8_F16_Fake16_Profile,
|
||||
int_amdgcn_cvt_pk_fp8_f16>;
|
||||
defm V_CVT_PK_BF8_F16_gfx1250 : VOP3Inst_t16_with_profiles<"v_cvt_pk_bf8_f16_gfx1250",
|
||||
VOP3_CVT_PK_F8_F16_Profile,
|
||||
VOP3_CVT_PK_F8_F16_True16_Profile,
|
||||
VOP3_CVT_PK_F8_F16_Fake16_Profile,
|
||||
int_amdgcn_cvt_pk_bf8_f16>;
|
||||
}
|
||||
|
||||
let HasClamp = 0, HasOpSel = 1 in {
|
||||
def VOP3_CVT_SR_F8_F16_Profile : VOP3_CVT_SR_F8_ByteSel_Profile<f16>;
|
||||
def VOP3_CVT_SR_F8_F16_True16_Profile : VOP3_Profile_True16<VOP3_CVT_SR_F8_F16_Profile>;
|
||||
def VOP3_CVT_SR_F8_F16_Fake16_Profile : VOP3_Profile_Fake16<VOP3_CVT_SR_F8_F16_Profile>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isGFX1250Plus in {
|
||||
let ReadsModeReg = 0 in {
|
||||
// These instructions have non-standard use of op_sel. They are using bits 2 and 3 of opsel
|
||||
// to select a byte in the vdst. Bits 0 and 1 are unused.
|
||||
let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
|
||||
defm V_CVT_SR_FP8_F16 : VOP3Inst_t16_with_profiles<"v_cvt_sr_fp8_f16", VOP3_CVT_SR_F8_F16_Profile,
|
||||
VOP3_CVT_SR_F8_F16_True16_Profile, VOP3_CVT_SR_F8_F16_Fake16_Profile>;
|
||||
defm V_CVT_SR_BF8_F16 : VOP3Inst_t16_with_profiles<"v_cvt_sr_bf8_f16", VOP3_CVT_SR_F8_F16_Profile,
|
||||
VOP3_CVT_SR_F8_F16_True16_Profile, VOP3_CVT_SR_F8_F16_Fake16_Profile>;
|
||||
}
|
||||
} // End ReadsModeReg = 0
|
||||
|
||||
let True16Predicate = UseRealTrue16Insts in {
|
||||
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f16, V_CVT_SR_FP8_F16_t16_e64, f16>;
|
||||
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f16, V_CVT_SR_BF8_F16_t16_e64, f16>;
|
||||
}
|
||||
let True16Predicate = UseFakeTrue16Insts in {
|
||||
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f16, V_CVT_SR_FP8_F16_fake16_e64, f16>;
|
||||
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f16, V_CVT_SR_BF8_F16_fake16_e64, f16>;
|
||||
}
|
||||
} // End SubtargetPredicate = isGFX1250Plus
|
||||
|
||||
class Cvt_Scale_Sr_F32ToBF16F16_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType DstTy> : GCNPat<
|
||||
(DstTy (node DstTy:$vdst_in, f32:$src0, i32:$src1, timm:$word_sel)),
|
||||
(inst (DstSelToOpSelXForm $word_sel), $src0, 0, $src1, VGPR_32:$vdst_in)
|
||||
@ -2019,6 +2066,10 @@ defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>;
|
||||
defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>;
|
||||
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
|
||||
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
|
||||
defm V_CVT_PK_FP8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x372, "v_cvt_pk_fp8_f16">;
|
||||
defm V_CVT_PK_BF8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x373, "v_cvt_pk_bf8_f16">;
|
||||
defm V_CVT_SR_FP8_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x374>;
|
||||
defm V_CVT_SR_BF8_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x375>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX10.
|
||||
|
@ -2071,6 +2071,15 @@ multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> :
|
||||
multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> :
|
||||
VOP3_Realtriple<GFX1250Gen, op, isSingle>;
|
||||
|
||||
multiclass VOP3Only_Realtriple_with_name_gfx1250<bits<10> op, string opName,
|
||||
string asmName, string pseudo_mnemonic = "",
|
||||
bit isSingle = 0> :
|
||||
VOP3_Realtriple_with_name<GFX1250Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
|
||||
|
||||
multiclass VOP3Only_Realtriple_t16_gfx1250<bits<10> op, string asmName = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
|
||||
string opName = NAME, string pseudo_mnemonic = "", bit isSingle = 0> :
|
||||
VOP3Only_Realtriple_with_name_gfx1250<op, opName, asmName, pseudo_mnemonic, isSingle>;
|
||||
|
||||
multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
|
||||
string pseudo_mnemonic = "", bit isSingle = 0> :
|
||||
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
|
||||
@ -2091,6 +2100,13 @@ multiclass VOP3Only_Realtriple_t16_and_fake16_gfx12<bits<10> op, string asmName,
|
||||
defm _fake16 : VOP3Only_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic>;
|
||||
}
|
||||
|
||||
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx1250<bits<10> op,
|
||||
string asmName = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
|
||||
string opName = NAME, string pseudo_mnemonic = ""> {
|
||||
defm _t16 : VOP3Only_Realtriple_t16_gfx1250<op, asmName, opName#"_t16", pseudo_mnemonic>;
|
||||
defm _fake16 : VOP3Only_Realtriple_t16_gfx1250<op, asmName, opName#"_fake16", pseudo_mnemonic>;
|
||||
}
|
||||
|
||||
multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
|
||||
string asmName, bit isSingle = 0> {
|
||||
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
|
||||
|
28
llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll
Normal file
28
llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
|
||||
|
||||
define i16 @cvt_pk_bf8_f16_v(ptr addrspace(1) %out) {
|
||||
; GFX1250-LABEL: cvt_pk_bf8_f16_v:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||
; GFX1250-NEXT: v_cvt_pk_bf8_f16 v0, 0x38003800 ; encoding: [0x00,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x38]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> <half 0xH3800, half 0xH3800>)
|
||||
ret i16 %cvt
|
||||
}
|
||||
|
||||
; GFX1250: codeLenInByte = 24
|
||||
|
||||
define i16 @cvt_pk_fp8_f16_v(ptr addrspace(1) %out) {
|
||||
; GFX1250-LABEL: cvt_pk_fp8_f16_v:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||
; GFX1250-NEXT: v_cvt_pk_fp8_f16 v0, 0x3800 ; encoding: [0x00,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> <half 0xH3800, half 0xH0>)
|
||||
ret i16 %cvt
|
||||
}
|
||||
|
||||
; GFX1250: codeLenInByte = 24
|
539
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
Normal file
539
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
Normal file
@ -0,0 +1,539 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-REAL16 %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-FAKE16 %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s
|
||||
|
||||
declare i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half>)
|
||||
declare i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half>)
|
||||
declare i32 @llvm.amdgcn.cvt.sr.bf8.f16(half, i32, i32, i32)
|
||||
declare i32 @llvm.amdgcn.cvt.sr.fp8.f16(half, i32, i32, i32)
|
||||
|
||||
define amdgpu_ps void @test_cvt_pk_bf8_f16_v(<2 x half> %a, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_v:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0
|
||||
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_bf8_f16 v0, v0
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_v:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0
|
||||
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_bf8_f16 v0, v0
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[4:5], v0, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> %a)
|
||||
store i16 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_pk_bf8_f16_s(<2 x half> inreg %a, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0
|
||||
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, s0
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0
|
||||
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, s0
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> %a)
|
||||
store i16 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_pk_bf8_f16_l(ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000
|
||||
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, 0x56400000
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000
|
||||
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, 0x56400000
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> <half 0.0, half 100.0>)
|
||||
store i16 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_pk_fp8_f16_v(<2 x half> %a, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_v:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0
|
||||
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_fp8_f16 v0, v0
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_v:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0
|
||||
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_fp8_f16 v0, v0
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[4:5], v0, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> %a)
|
||||
store i16 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_pk_fp8_f16_s(<2 x half> inreg %a, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0
|
||||
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, s0
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0
|
||||
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, s0
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> %a)
|
||||
store i16 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_pk_fp8_f16_l(ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000
|
||||
; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, 0x56400000
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000
|
||||
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, 0x56400000
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> <half 0.0, half 100.0>)
|
||||
store i16 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_bf8_f16_byte0(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte0:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte0:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte0:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte0:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 0)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_bf8_f16_byte1(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte1:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:1
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte1:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:1
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte1:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:1
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte1:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:1
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 1)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_bf8_f16_byte2(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte2:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:2
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte2:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:2
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte2:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:2
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte2:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:2
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 2)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_bf8_f16_byte3(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte3:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:3
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte3:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte3:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:3
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte3:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:3
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 3)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_bf8_f16_hi_byte0(<2 x half> %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_hi_byte0:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.h, v1
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_hi_byte0:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_lshrrev_b32 v0, 16, v0
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_hi_byte0:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_mov_b32_e32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_hi_byte0:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%a.1 = extractelement <2 x half> %a, i32 1
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a.1, i32 %sr, i32 %old, i32 0)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_fp8_f16_byte0(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte0:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte0:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte0:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte0:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 0)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_fp8_f16_byte1(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte1:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:1
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte1:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:1
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte1:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:1
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte1:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:1
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 1)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_fp8_f16_byte2(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte2:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:2
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte2:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:2
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte2:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:2
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte2:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:2
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 2)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_fp8_f16_byte3(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte3:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:3
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte3:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte3:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:3
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte3:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:3
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 3)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_cvt_sr_fp8_f16_hi_byte0(<2 x half> %a, i32 %sr, i32 %old, ptr addrspace(1) %out) {
|
||||
; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_hi_byte0:
|
||||
; GFX1250-SDAG-REAL16: ; %bb.0:
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.h, v1
|
||||
; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_hi_byte0:
|
||||
; GFX1250-SDAG-FAKE16: ; %bb.0:
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_lshrrev_b32 v0, 16, v0
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v4, v3
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1
|
||||
; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
|
||||
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_hi_byte0:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_mov_b32_e32 v7, v4
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1
|
||||
; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_hi_byte0:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v7, v4
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1
|
||||
; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
%a.1 = extractelement <2 x half> %a, i32 1
|
||||
%cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a.1, i32 %sr, i32 %old, i32 0)
|
||||
store i32 %cvt, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX1250: {{.*}}
|
@ -402,3 +402,148 @@ v_ashr_pk_u8_i32 v2, v4, v7, 12345
|
||||
|
||||
v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
|
||||
// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,0]
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,1]
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, v2 clamp
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, s2
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, 100.0
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
|
||||
|
||||
// Inline constants are not supported by v_cvt_pk_bf8_f16
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, 1
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, 0x3800
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, 0.5
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, 0x3118
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, 0.15915494
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,0]
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,1]
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, v2 clamp
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, s2
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, 100.0
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
|
||||
|
||||
// Inline constants are not supported by v_cvt_pk_fp8_f16
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, 1
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, 0x3800
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, 0.5
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, 0x3118
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, 0.15915494
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1]
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:0
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, s3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, 0x1234
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, -v2, v3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, |v2|, v3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, |v2|, v3 op_sel:[1]
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:2
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1]
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, s3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, 0x1234
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, -v2, v3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, |v2|, v3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, |v2|, v3 op_sel:[1]
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:2
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
@ -402,3 +402,148 @@ v_ashr_pk_u8_i32 v2, v4, v7, 12345
|
||||
|
||||
v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
|
||||
// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, v2
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.h, v2
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v0.l, v2 clamp
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v0.l, v2 clamp ; encoding: [0x00,0x80,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, s2
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, 100.0
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
|
||||
|
||||
// Inline constants are not supported by v_cvt_pk_bf8_f16
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, 1
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, 0x3800
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, 0.5
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, 0x3118
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, 0.15915494
|
||||
// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, v2
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.h, v2
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, v2 clamp
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.l, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, s2
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, 100.0
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
|
||||
|
||||
// Inline constants are not supported by v_cvt_pk_fp8_f16
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, 1
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, 0x3800
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, 0.5
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, 0x3118
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, 0.15915494
|
||||
// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.l, v3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.h, v3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:0
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, s3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, 0x1234
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, -v2, v3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, |v2.l|, v3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, |v2.h|, v3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:2
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:3
|
||||
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.l, v3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.h, v3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, s3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, 0x1234
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, -v2, v3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, |v2.l|, v3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, |v2.h|, v3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:2
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:3
|
||||
// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
@ -329,3 +329,67 @@ v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,0] quad_perm:[1,2,3,0]
|
||||
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1
|
||||
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,0] quad_perm:[1,2,3,0]
|
||||
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1
|
||||
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 quad_perm:[0,1,2,3] fi:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] quad_perm:[0,1,2,3] fi:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 quad_perm:[0,1,2,3] fi:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] quad_perm:[0,1,2,3] fi:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
@ -329,3 +329,67 @@ v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
|
||||
v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, v2 quad_perm:[1,2,3,0]
|
||||
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 row_share:0 row_mask:0x5 bank_mask:0x3 fi:1
|
||||
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, v2 quad_perm:[1,2,3,0]
|
||||
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 row_share:0 row_mask:0x5 bank_mask:0x3 fi:1
|
||||
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.l, v3 quad_perm:[0,1,2,3] fi:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.h, v3 quad_perm:[0,1,2,3] fi:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:3 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.l, v3 quad_perm:[0,1,2,3] fi:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.h, v3 quad_perm:[0,1,2,3] fi:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:3 quad_perm:[0,1,2,3]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
@ -225,3 +225,67 @@ v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
|
||||
v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_bf8_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
|
||||
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_fp8_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
|
||||
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] dpp8:[1,2,3,4,5,6,7,0] fi:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] dpp8:[1,2,3,4,5,6,7,0] fi:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
@ -225,3 +225,67 @@ v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
|
||||
v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_bf8_f16 v1.l, v2 dpp8:[7,6,5,4,3,2,1,0]
|
||||
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_fp8_f16 v1.l, v2 dpp8:[7,6,5,4,3,2,1,0]
|
||||
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
|
||||
// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.h, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.h, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0]
|
||||
// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
|
||||
|
@ -96,3 +96,28 @@ v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
|
||||
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// GFX125X-ERR-NEXT:{{^}}v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
|
||||
// GFX125X-ERR-NEXT:{{^}} ^
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 clamp
|
||||
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_bf8_f16 v1, v2, v3 clamp
|
||||
// GFX125X-ERR-NEXT:{{^}} ^
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 mul:2
|
||||
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
|
||||
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_bf8_f16 v1, v2, v3 mul:2
|
||||
// GFX125X-ERR-NEXT:{{^}} ^
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 clamp
|
||||
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_fp8_f16 v1, v2, v3 clamp
|
||||
// GFX125X-ERR-NEXT:{{^}} ^
|
||||
|
||||
v_cvt_sr_fp8_f16 v1, v2, v3 mul:2
|
||||
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
|
||||
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_fp8_f16 v1, v2, v3 mul:2
|
||||
// GFX125X-ERR-NEXT:{{^}} ^
|
||||
|
||||
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:4
|
||||
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid byte_sel value.
|
||||
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:4
|
||||
// GFX125X-ERR-NEXT:{{^}} ^
|
||||
|
@ -422,6 +422,170 @@
|
||||
0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04
|
||||
# GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
|
||||
|
||||
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
# GFX1250-FAKE16: {{.*}}
|
||||
# GFX1250-REAL16: {{.*}}
|
||||
0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, v2 clamp ; encoding: [0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, -v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20]
|
||||
|
||||
0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, -v2.l, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20]
|
||||
|
||||
0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00]
|
||||
|
||||
0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
||||
0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00]
|
||||
|
@ -269,3 +269,67 @@
|
||||
|
||||
0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff
|
||||
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
|
||||
|
||||
0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
|
||||
|
||||
0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
|
||||
|
||||
0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
|
||||
|
||||
0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53]
|
||||
|
||||
0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
|
||||
0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
|
||||
0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
|
||||
0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
|
||||
0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
|
||||
0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
|
||||
0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
|
||||
0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
|
||||
0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
|
||||
0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
|
||||
0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
|
||||
|
||||
0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff]
|
||||
|
@ -187,3 +187,67 @@
|
||||
|
||||
0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
|
||||
# GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
|
||||
|
||||
0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
|
||||
0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05
|
||||
# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
|
||||
0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
|
||||
0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05
|
||||
# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
|
||||
|
||||
0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
||||
0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f
|
||||
# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]
|
||||
|
Loading…
x
Reference in New Issue
Block a user