[SelectionDAG] Add expansion for llvm.convert.from.arbitrary.fp (#179318)
The expansion converts arbitrary-precision FP represented as integer following these algorithm: 1. Extract sign, exponent, and mantissa bit fields via masks and shifts. 2. Classify the input (zero, denormal, normal, Inf, NaN) using the exponent and mantissa fields. 3. Normal path: adjusting the exponent bias and left-shifting the mantissa to fit the wider destination format. 4. Denormal path: normalizing by finding the MSB position of the mantissa (via count-leading-zeros), computing the correct exponent from that position, stripping the implicit leading 1, and shifting the fraction into the destination mantissa field. 5. Assemble the destination IEEE bit pattern (sign | exponent | mantissa) and select among the normal, denormal, and special-value results. Currently only conversions from OCP floats are covered, in LLVM terms these are: Float8E5M2, Float8E4M3FN, Float6E3M2FN, Float6E2M3FN, Float4E2M1FN. OCP spec: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf AI has assisted in X86 E2E testing.
This commit is contained in:
parent
1a7502592f
commit
a636928bb4
@ -412,6 +412,10 @@ public:
|
||||
/// format interpretation for llvm.convert.to.arbitrary.fp and
|
||||
/// llvm.convert.from.arbitrary.fp intrinsics.
|
||||
LLVM_ABI static bool isValidArbitraryFPFormat(StringRef Format);
|
||||
|
||||
/// Returns the fltSemantics for a given arbitrary FP format string,
|
||||
/// or nullptr if invalid.
|
||||
LLVM_ABI static const fltSemantics *getArbitraryFPSemantics(StringRef Format);
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
@ -1014,6 +1014,12 @@ enum NodeType {
|
||||
STRICT_BF16_TO_FP,
|
||||
STRICT_FP_TO_BF16,
|
||||
|
||||
/// CONVERT_FROM_ARBITRARY_FP - This operator converts from an arbitrary
|
||||
/// floating-point represented as an integer to a native FP type.
|
||||
/// The first operand is the integer containing the source FP bits.
|
||||
/// The second operand is a constant indicating the source FP semantics.
|
||||
CONVERT_FROM_ARBITRARY_FP,
|
||||
|
||||
/// Perform various unary floating-point operations inspired by libm. For
|
||||
/// FPOWI, the result is undefined if the integer operand doesn't fit into
|
||||
/// sizeof(int).
|
||||
|
||||
@ -3528,6 +3528,243 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
Results.push_back(Op);
|
||||
break;
|
||||
}
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP: {
|
||||
// Expand conversion from arbitrary FP format stored in an integer to a
|
||||
// native IEEE float type using integer bit manipulation.
|
||||
//
|
||||
// TODO: currently only conversions from FP4, FP6 and FP8 formats from OCP
|
||||
// specification are expanded. Remaining arbitrary FP types: Float8E4M3,
|
||||
// Float8E3M4, Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ,
|
||||
// Float8E8M0FNU.
|
||||
EVT DstVT = Node->getValueType(0);
|
||||
|
||||
SDValue IntVal = Node->getOperand(0);
|
||||
const uint64_t SemEnum = Node->getConstantOperandVal(1);
|
||||
const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
|
||||
|
||||
// Supported source formats.
|
||||
switch (Sem) {
|
||||
case APFloatBase::S_Float8E5M2:
|
||||
case APFloatBase::S_Float8E4M3FN:
|
||||
case APFloatBase::S_Float6E3M2FN:
|
||||
case APFloatBase::S_Float6E2M3FN:
|
||||
case APFloatBase::S_Float4E2M1FN:
|
||||
break;
|
||||
default:
|
||||
DAG.getContext()->emitError("CONVERT_FROM_ARBITRARY_FP: not implemented "
|
||||
"source format (semantics enum " +
|
||||
Twine(SemEnum) + ")");
|
||||
Results.push_back(DAG.getPOISON(DstVT));
|
||||
break;
|
||||
}
|
||||
if (!Results.empty())
|
||||
break;
|
||||
|
||||
const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(Sem);
|
||||
|
||||
const unsigned SrcBits = APFloat::getSizeInBits(SrcSem);
|
||||
const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
|
||||
const unsigned SrcMant = SrcPrecision - 1;
|
||||
const unsigned SrcExp = SrcBits - SrcMant - 1;
|
||||
const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem);
|
||||
|
||||
const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior;
|
||||
const fltNanEncoding NanEnc = SrcSem.nanEncoding;
|
||||
|
||||
// Destination format parameters.
|
||||
const fltSemantics &DstSem = DstVT.getFltSemantics();
|
||||
|
||||
const unsigned DstBits = APFloat::getSizeInBits(DstSem);
|
||||
const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1;
|
||||
const unsigned DstExpBits = DstBits - DstMant - 1;
|
||||
const int DstMinExp = APFloat::semanticsMinExponent(DstSem);
|
||||
const int DstBias = 1 - DstMinExp;
|
||||
const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1;
|
||||
|
||||
// Work in an integer type matching the destination float width.
|
||||
// Use zero-extend to preserve the raw bit-pattern.
|
||||
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), DstBits);
|
||||
SDValue Src = DAG.getZExtOrTrunc(IntVal, dl, IntVT);
|
||||
|
||||
EVT SetCCVT = getSetCCResultType(IntVT);
|
||||
|
||||
SDValue Zero = DAG.getConstant(0, dl, IntVT);
|
||||
SDValue One = DAG.getConstant(1, dl, IntVT);
|
||||
|
||||
// Extract bit fields.
|
||||
const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0;
|
||||
const uint64_t ExpMask = (1ULL << SrcExp) - 1;
|
||||
|
||||
SDValue MantField = DAG.getNode(ISD::AND, dl, IntVT, Src,
|
||||
DAG.getConstant(MantMask, dl, IntVT));
|
||||
|
||||
SDValue ExpField =
|
||||
DAG.getNode(ISD::AND, dl, IntVT,
|
||||
DAG.getNode(ISD::SRL, dl, IntVT, Src,
|
||||
DAG.getShiftAmountConstant(SrcMant, IntVT, dl)),
|
||||
DAG.getConstant(ExpMask, dl, IntVT));
|
||||
|
||||
SDValue SignBit =
|
||||
DAG.getNode(ISD::SRL, dl, IntVT, Src,
|
||||
DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl));
|
||||
|
||||
// Precompute sign shifted to MSB of destination.
|
||||
SDValue SignShifted =
|
||||
DAG.getNode(ISD::SHL, dl, IntVT, SignBit,
|
||||
DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl));
|
||||
|
||||
// Classify the input value based on compile-time format properties.
|
||||
SDValue ExpAllOnes = DAG.getConstant(ExpMask, dl, IntVT);
|
||||
SDValue IsExpAllOnes =
|
||||
DAG.getSetCC(dl, SetCCVT, ExpField, ExpAllOnes, ISD::SETEQ);
|
||||
SDValue IsExpZero = DAG.getSetCC(dl, SetCCVT, ExpField, Zero, ISD::SETEQ);
|
||||
SDValue IsMantZero = DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETEQ);
|
||||
SDValue IsMantNonZero =
|
||||
DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETNE);
|
||||
|
||||
// NaN detection.
|
||||
SDValue IsNaN;
|
||||
if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) {
|
||||
// FiniteOnly formats (E2M1FN, E3M2FN, E2M3FN) never produce NaN.
|
||||
IsNaN = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
|
||||
} else if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
|
||||
// E5M2 produces NaN when exp == all-ones AND mantissa != 0.
|
||||
IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantNonZero);
|
||||
} else {
|
||||
// NanOnly + AllOnes (E4M3FN): NaN when all exp and mantissa bits are 1.
|
||||
assert(NanEnc == fltNanEncoding::AllOnes);
|
||||
SDValue MantAllOnes = DAG.getConstant(MantMask, dl, IntVT);
|
||||
SDValue IsMantAllOnes =
|
||||
DAG.getSetCC(dl, SetCCVT, MantField, MantAllOnes, ISD::SETEQ);
|
||||
IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantAllOnes);
|
||||
}
|
||||
|
||||
// Inf detection.
|
||||
SDValue IsInf;
|
||||
if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
|
||||
// E5M2: Inf when exp == all-ones AND mantissa == 0.
|
||||
IsInf = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantZero);
|
||||
} else {
|
||||
// NanOnly and FiniteOnly formats have no Inf representation.
|
||||
IsInf = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
|
||||
}
|
||||
|
||||
// Zero detection.
|
||||
SDValue IsZero = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantZero);
|
||||
|
||||
// Denorm detection: exp == 0 AND mant != 0.
|
||||
SDValue IsDenorm =
|
||||
DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantNonZero);
|
||||
|
||||
// Normal value conversion.
|
||||
// dst_exp = exp_field + (DstBias - SrcBias)
|
||||
// dst_mant = mant << (DstMant - SrcMant)
|
||||
const int BiasAdjust = DstBias - SrcBias;
|
||||
SDValue NormDstExp = DAG.getNode(
|
||||
ISD::ADD, dl, IntVT, ExpField,
|
||||
DAG.getConstant(APInt(DstBits, BiasAdjust, true), dl, IntVT));
|
||||
|
||||
SDValue NormDstMant;
|
||||
if (DstMant > SrcMant) {
|
||||
SDValue NormDstMantShift =
|
||||
DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl);
|
||||
NormDstMant =
|
||||
DAG.getNode(ISD::SHL, dl, IntVT, MantField, NormDstMantShift);
|
||||
} else {
|
||||
NormDstMant = MantField;
|
||||
}
|
||||
|
||||
// Assemble normal result.
|
||||
SDValue DstMantShift = DAG.getShiftAmountConstant(DstMant, IntVT, dl);
|
||||
SDValue NormExpShifted =
|
||||
DAG.getNode(ISD::SHL, dl, IntVT, NormDstExp, DstMantShift);
|
||||
SDValue NormResult = DAG.getNode(
|
||||
ISD::OR, dl, IntVT,
|
||||
DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted),
|
||||
NormDstMant);
|
||||
|
||||
// Denormal value conversion.
|
||||
// For a denormal source (exp_field == 0, mant != 0), normalize by finding
|
||||
// the MSB position of mant using CTLZ, then compute the correct
|
||||
// exponent and mantissa for the destination format.
|
||||
SDValue DenormResult;
|
||||
{
|
||||
const unsigned IntVTBits = DstBits;
|
||||
SDValue LeadingZeros =
|
||||
DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, IntVT, MantField);
|
||||
|
||||
// dst_exp_denorm = (IntVTBits + DstBias - SrcBias - SrcMant) -
|
||||
// LeadingZeros
|
||||
const int DenormExpConst =
|
||||
(int)IntVTBits + DstBias - SrcBias - (int)SrcMant;
|
||||
SDValue DenormDstExp = DAG.getNode(
|
||||
ISD::SUB, dl, IntVT,
|
||||
DAG.getConstant(APInt(DstBits, DenormExpConst, true), dl, IntVT),
|
||||
LeadingZeros);
|
||||
|
||||
// MSB position of the mantissa (0-indexed from LSB).
|
||||
SDValue MantMSB =
|
||||
DAG.getNode(ISD::SUB, dl, IntVT,
|
||||
DAG.getConstant(IntVTBits - 1, dl, IntVT), LeadingZeros);
|
||||
|
||||
// leading_one = 1 << MantMSB
|
||||
SDValue LeadingOne = DAG.getNode(ISD::SHL, dl, IntVT, One, MantMSB);
|
||||
|
||||
// frac = mant XOR leading_one (strip the implicit 1)
|
||||
SDValue Frac = DAG.getNode(ISD::XOR, dl, IntVT, MantField, LeadingOne);
|
||||
|
||||
// shift_amount = DstMant - MantMSB
|
||||
// = DstMant - (IntVTBits - 1 - LeadingZeros)
|
||||
// = LeadingZeros - (IntVTBits - 1 - DstMant)
|
||||
const unsigned ShiftSub = IntVTBits - 1 - DstMant; // always >= 0
|
||||
SDValue ShiftAmount = DAG.getNode(ISD::SUB, dl, IntVT, LeadingZeros,
|
||||
DAG.getConstant(ShiftSub, dl, IntVT));
|
||||
|
||||
SDValue DenormDstMant =
|
||||
DAG.getNode(ISD::SHL, dl, IntVT, Frac, ShiftAmount);
|
||||
|
||||
// Assemble denorm as sign | (denorm_dst_exp << DstMant) | denorm_dst_mant
|
||||
SDValue DenormExpShifted =
|
||||
DAG.getNode(ISD::SHL, dl, IntVT, DenormDstExp, DstMantShift);
|
||||
DenormResult = DAG.getNode(
|
||||
ISD::OR, dl, IntVT,
|
||||
DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted),
|
||||
DenormDstMant);
|
||||
}
|
||||
|
||||
// Select between normal and denorm paths.
|
||||
SDValue FiniteResult =
|
||||
DAG.getSelect(dl, IntVT, IsDenorm, DenormResult, NormResult);
|
||||
|
||||
// Build special-value results.
|
||||
// NaN -> canonical quiet NaN: sign=0, exp=all-ones, qNaN bit set.
|
||||
// Encoding: (DstExpAllOnes << DstMant) | (1 << (DstMant - 1))
|
||||
const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
|
||||
SDValue NaNResult =
|
||||
DAG.getConstant((DstExpAllOnes << DstMant) | QNaNBit, dl, IntVT);
|
||||
|
||||
// Inf -> destination Inf.
|
||||
// sign | (DstExpAllOnes << DstMant)
|
||||
SDValue InfResult =
|
||||
DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
|
||||
DAG.getConstant(DstExpAllOnes << DstMant, dl, IntVT));
|
||||
|
||||
// Zero -> signed zero.
|
||||
// Sign bit only.
|
||||
SDValue ZeroResult = SignShifted;
|
||||
|
||||
// Final selection goes in order: NaN takes priority, then Inf, then Zero.
|
||||
SDValue Result = FiniteResult;
|
||||
Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result);
|
||||
Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result);
|
||||
Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result);
|
||||
|
||||
// Bitcast integer result to destination float type.
|
||||
Result = DAG.getNode(ISD::BITCAST, dl, DstVT, Result);
|
||||
|
||||
Results.push_back(Result);
|
||||
break;
|
||||
}
|
||||
case ISD::FCANONICALIZE: {
|
||||
// This implements llvm.canonicalize.f* by multiplication with 1.0, as
|
||||
// suggested in
|
||||
|
||||
@ -2768,6 +2768,9 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::SINT_TO_FP:
|
||||
case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break;
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP:
|
||||
R = SoftPromoteHalfRes_CONVERT_FROM_ARBITRARY_FP(N);
|
||||
break;
|
||||
case ISD::POISON:
|
||||
case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break;
|
||||
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
|
||||
@ -3055,6 +3058,19 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) {
|
||||
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
|
||||
}
|
||||
|
||||
SDValue
|
||||
DAGTypeLegalizer::SoftPromoteHalfRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N) {
|
||||
EVT OVT = N->getValueType(0);
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
|
||||
SDLoc dl(N);
|
||||
|
||||
SDValue Res = DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, dl, NVT,
|
||||
N->getOperand(0), N->getOperand(1));
|
||||
|
||||
// Round the value to the softened type.
|
||||
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) {
|
||||
return DAG.getUNDEF(MVT::i16);
|
||||
}
|
||||
|
||||
@ -2076,6 +2076,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::FP16_TO_FP:
|
||||
case ISD::VP_UINT_TO_FP:
|
||||
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP:
|
||||
Res = PromoteIntOp_CONVERT_FROM_ARBITRARY_FP(N);
|
||||
break;
|
||||
case ISD::STRICT_FP16_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
|
||||
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
|
||||
@ -2685,6 +2688,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
|
||||
ZExtPromotedInteger(N->getOperand(0))), 0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_FROM_ARBITRARY_FP(SDNode *N) {
|
||||
return SDValue(DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0)),
|
||||
N->getOperand(1)),
|
||||
0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) {
|
||||
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
|
||||
ZExtPromotedInteger(N->getOperand(1))), 0);
|
||||
|
||||
@ -397,6 +397,7 @@ private:
|
||||
SDValue PromoteIntOp_TRUNCATE(SDNode *N);
|
||||
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
|
||||
SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N);
|
||||
SDValue PromoteIntOp_CONVERT_FROM_ARBITRARY_FP(SDNode *N);
|
||||
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
|
||||
SDValue PromoteIntOp_VP_ZERO_EXTEND(SDNode *N);
|
||||
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
|
||||
@ -787,6 +788,7 @@ private:
|
||||
SDValue SoftPromoteHalfRes_FNEG(SDNode *N);
|
||||
SDValue SoftPromoteHalfRes_AssertNoFPClass(SDNode *N);
|
||||
SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N);
|
||||
SDValue SoftPromoteHalfRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N);
|
||||
SDValue SoftPromoteHalfRes_UNDEF(SDNode *N);
|
||||
SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N);
|
||||
SDValue SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N);
|
||||
@ -838,6 +840,7 @@ private:
|
||||
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
|
||||
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
|
||||
SDValue ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N);
|
||||
SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
|
||||
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
|
||||
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
|
||||
|
||||
@ -460,6 +460,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
case ISD::USUBO:
|
||||
case ISD::SMULO:
|
||||
case ISD::UMULO:
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP:
|
||||
case ISD::FCANONICALIZE:
|
||||
case ISD::FFREXP:
|
||||
case ISD::FMODF:
|
||||
|
||||
@ -62,6 +62,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
|
||||
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
|
||||
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP:
|
||||
R = ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(N);
|
||||
break;
|
||||
case ISD::AssertZext:
|
||||
case ISD::AssertSext:
|
||||
case ISD::FPOWI:
|
||||
@ -478,6 +481,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
|
||||
N->getOperand(1));
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N) {
|
||||
SDLoc DL(N);
|
||||
SDValue Op = N->getOperand(0);
|
||||
EVT OpVT = Op.getValueType();
|
||||
// The result needs scalarizing, but it's not a given that the source does.
|
||||
// See similar logic in ScalarizeVecRes_UnaryOp.
|
||||
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
|
||||
Op = GetScalarizedVector(Op);
|
||||
} else {
|
||||
EVT VT = OpVT.getVectorElementType();
|
||||
Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
|
||||
}
|
||||
return DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, DL,
|
||||
N->getValueType(0).getVectorElementType(), Op,
|
||||
N->getOperand(1));
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N) {
|
||||
SDValue Op = GetScalarizedVector(N->getOperand(0));
|
||||
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
|
||||
@ -818,6 +838,7 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
break;
|
||||
case ISD::FP_TO_SINT_SAT:
|
||||
case ISD::FP_TO_UINT_SAT:
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP:
|
||||
Res = ScalarizeVecOp_UnaryOpWithExtraInput(N);
|
||||
break;
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
@ -1382,6 +1403,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::VP_UINT_TO_FP:
|
||||
case ISD::FCANONICALIZE:
|
||||
case ISD::AssertNoFPClass:
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP:
|
||||
SplitVecRes_UnaryOp(N, Lo, Hi);
|
||||
break;
|
||||
case ISD::ADDRSPACECAST:
|
||||
@ -2783,7 +2805,8 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
|
||||
const SDNodeFlags Flags = N->getFlags();
|
||||
unsigned Opcode = N->getOpcode();
|
||||
if (N->getNumOperands() <= 2) {
|
||||
if (Opcode == ISD::FP_ROUND || Opcode == ISD::AssertNoFPClass) {
|
||||
if (Opcode == ISD::FP_ROUND || Opcode == ISD::AssertNoFPClass ||
|
||||
Opcode == ISD::CONVERT_FROM_ARBITRARY_FP) {
|
||||
Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags);
|
||||
Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags);
|
||||
} else {
|
||||
@ -3596,7 +3619,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
break;
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::VP_FP_ROUND:
|
||||
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
|
||||
case ISD::FP_ROUND:
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP:
|
||||
Res = SplitVecOp_FP_ROUND(N);
|
||||
break;
|
||||
case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break;
|
||||
case ISD::STORE:
|
||||
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
|
||||
@ -4732,8 +4758,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
|
||||
Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo);
|
||||
Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi);
|
||||
} else {
|
||||
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
|
||||
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
|
||||
Lo = DAG.getNode(N->getOpcode(), DL, OutVT, Lo, N->getOperand(1));
|
||||
Hi = DAG.getNode(N->getOpcode(), DL, OutVT, Hi, N->getOperand(1));
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
|
||||
@ -5142,6 +5168,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::VP_UINT_TO_FP:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::VP_ZERO_EXTEND:
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP:
|
||||
Res = WidenVecRes_Convert(N);
|
||||
break;
|
||||
|
||||
@ -7278,6 +7305,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::UINT_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::TRUNCATE:
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP:
|
||||
Res = WidenVecOp_Convert(N);
|
||||
break;
|
||||
|
||||
@ -7499,7 +7527,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
} else {
|
||||
if (Opcode == ISD::FP_ROUND)
|
||||
if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP)
|
||||
Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1));
|
||||
else
|
||||
Res = DAG.getNode(Opcode, dl, WideVT, InOp);
|
||||
@ -7523,9 +7551,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
|
||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
|
||||
ReplaceValueWith(SDValue(N, 1), NewChain);
|
||||
} else {
|
||||
for (unsigned i = 0; i < NumElts; ++i)
|
||||
Ops[i] = DAG.getNode(Opcode, dl, EltVT,
|
||||
DAG.getExtractVectorElt(dl, InEltVT, InOp, i));
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue Elt = DAG.getExtractVectorElt(dl, InEltVT, InOp, i);
|
||||
if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP)
|
||||
Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt, N->getOperand(1));
|
||||
else
|
||||
Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt);
|
||||
}
|
||||
}
|
||||
|
||||
return DAG.getBuildVector(VT, dl, Ops);
|
||||
|
||||
@ -7148,6 +7148,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||
DAG.getValueType(VT.getScalarType())));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::convert_from_arbitrary_fp: {
|
||||
// Extract format metadata and convert to semantics enum.
|
||||
EVT DstVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
|
||||
Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
|
||||
StringRef FormatStr = cast<MDString>(MD)->getString();
|
||||
const fltSemantics *SrcSem =
|
||||
APFloatBase::getArbitraryFPSemantics(FormatStr);
|
||||
if (!SrcSem) {
|
||||
DAG.getContext()->emitError(
|
||||
"convert_from_arbitrary_fp: not implemented format '" + FormatStr +
|
||||
"'");
|
||||
setValue(&I, DAG.getPOISON(DstVT));
|
||||
return;
|
||||
}
|
||||
APFloatBase::Semantics SemEnum = APFloatBase::SemanticsToEnum(*SrcSem);
|
||||
|
||||
SDValue IntVal = getValue(I.getArgOperand(0));
|
||||
|
||||
// Emit ISD::CONVERT_FROM_ARBITRARY_FP node.
|
||||
SDValue SemConst =
|
||||
DAG.getTargetConstant(static_cast<int>(SemEnum), sdl, MVT::i32);
|
||||
setValue(&I, DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, sdl, DstVT, IntVal,
|
||||
SemConst));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::set_rounding:
|
||||
Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
|
||||
{getRoot(), getValue(I.getArgOperand(0))});
|
||||
|
||||
@ -435,6 +435,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::STRICT_BF16_TO_FP: return "strict_bf16_to_fp";
|
||||
case ISD::FP_TO_BF16: return "fp_to_bf16";
|
||||
case ISD::STRICT_FP_TO_BF16: return "strict_fp_to_bf16";
|
||||
case ISD::CONVERT_FROM_ARBITRARY_FP: return "convert_from_arbitrary_fp";
|
||||
case ISD::LROUND: return "lround";
|
||||
case ISD::STRICT_LROUND: return "strict_lround";
|
||||
case ISD::LLROUND: return "llround";
|
||||
|
||||
@ -1149,7 +1149,7 @@ void TargetLoweringBase::initActions() {
|
||||
ISD::FASIN, ISD::FATAN,
|
||||
ISD::FCOSH, ISD::FSINH,
|
||||
ISD::FTANH, ISD::FATAN2,
|
||||
ISD::FMULADD},
|
||||
ISD::FMULADD, ISD::CONVERT_FROM_ARBITRARY_FP},
|
||||
VT, Expand);
|
||||
|
||||
// Overflow operations default to expand
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/Config/llvm-config.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
@ -6085,6 +6086,18 @@ bool APFloatBase::isValidArbitraryFPFormat(StringRef Format) {
|
||||
return llvm::is_contained(ValidFormats, Format);
|
||||
}
|
||||
|
||||
const fltSemantics *APFloatBase::getArbitraryFPSemantics(StringRef Format) {
|
||||
// TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
|
||||
// Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
|
||||
return StringSwitch<const fltSemantics *>(Format)
|
||||
.Case("Float8E5M2", &semFloat8E5M2)
|
||||
.Case("Float8E4M3FN", &semFloat8E4M3FN)
|
||||
.Case("Float4E2M1FN", &semFloat4E2M1FN)
|
||||
.Case("Float6E3M2FN", &semFloat6E3M2FN)
|
||||
.Case("Float6E2M3FN", &semFloat6E2M3FN)
|
||||
.Default(nullptr);
|
||||
}
|
||||
|
||||
APFloat::Storage::~Storage() {
|
||||
if (usesLayout<IEEEFloat>(*semantics)) {
|
||||
IEEE.~IEEEFloat();
|
||||
|
||||
646
llvm/test/CodeGen/AMDGPU/arbitrary-fp-to-float.ll
Normal file
646
llvm/test/CodeGen/AMDGPU/arbitrary-fp-to-float.ll
Normal file
@ -0,0 +1,646 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck %s
|
||||
|
||||
; Test llvm.convert.from.arbitrary intrinsic expansion.
|
||||
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata)
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata)
|
||||
declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata)
|
||||
|
||||
declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata)
|
||||
declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata)
|
||||
|
||||
; Float8E5M2
|
||||
; Layout: sign(1) exp(5) mant(2), bias=15
|
||||
; Supports: Inf, NaN, signed zero, denormals
|
||||
|
||||
; Float8E5M2 normal: 0_01111_00 = 1.0
|
||||
define float @from_f8e5m2_normal() {
|
||||
; CHECK-LABEL: from_f8e5m2_normal:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 zero: 0_00000_00 = +0.0
|
||||
define float @from_f8e5m2_zero() {
|
||||
; CHECK-LABEL: from_f8e5m2_zero:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 negative zero: 1_00000_00 = -0.0
|
||||
define float @from_f8e5m2_neg_zero() {
|
||||
; CHECK-LABEL: from_f8e5m2_neg_zero:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_bfrev_b32_e32 v0, 1
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 denorm: 0_00000_01 = 2^(-16)
|
||||
define float @from_f8e5m2_denorm() {
|
||||
; CHECK-LABEL: from_f8e5m2_denorm:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x37800000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 +Inf: 0_11111_00
|
||||
define float @from_f8e5m2_inf() {
|
||||
; CHECK-LABEL: from_f8e5m2_inf:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x7f800000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 NaN: 0_11111_01
|
||||
define float @from_f8e5m2_nan() {
|
||||
; CHECK-LABEL: from_f8e5m2_nan:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 max: 0_11110_11 = 57344
|
||||
define float @from_f8e5m2_max() {
|
||||
; CHECK-LABEL: from_f8e5m2_max:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x47600000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 negative: 1_01111_00 = -1.0
|
||||
define float @from_f8e5m2_neg() {
|
||||
; CHECK-LABEL: from_f8e5m2_neg:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, -1.0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 runtime arg test
|
||||
define float @from_f8e5m2_dynamic(i8 %x) {
|
||||
; CHECK-LABEL: from_f8e5m2_dynamic:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 3, v0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 24, v0
|
||||
; CHECK-NEXT: v_bfe_u32 v0, v0, 2, 5
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v1
|
||||
; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
|
||||
; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
|
||||
; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
|
||||
; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
|
||||
; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
|
||||
; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
|
||||
; CHECK-NEXT: v_sub_u32_e32 v4, 0x8e, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
|
||||
; CHECK-NEXT: v_add_u32_e32 v2, 0x38000000, v2
|
||||
; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
|
||||
; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5]
|
||||
; CHECK-NEXT: v_or_b32_e32 v4, v0, v1
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 31, v0
|
||||
; CHECK-NEXT: v_or_b32_e32 v0, 0x7f800000, v3
|
||||
; CHECK-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
; CHECK-NEXT: s_and_b64 vcc, s[6:7], vcc
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN (NanOnly, NanEncoding=AllOnes)
|
||||
; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7
|
||||
; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite.
|
||||
|
||||
; Float8E4M3FN normal: 0_0111_000 = 1.0
|
||||
define float @from_f8e4m3fn_normal() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_normal:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN NaN: 0_1111_111
|
||||
define float @from_f8e4m3fn_nan() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_nan:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN not-NaN: 0_1111_110 = 448
|
||||
; Despite exp=all-ones, this is a valid finite number (max value)
|
||||
define float @from_f8e4m3fn_max() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_max:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x43e00000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN not-NaN: 0_1111_101 = 416
|
||||
; exp=all-ones but mant!=all-ones so this is finite
|
||||
define float @from_f8e4m3fn_not_nan() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_not_nan:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x43d00000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN zero: 0_0000_000 = +0.0
|
||||
define float @from_f8e4m3fn_zero() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_zero:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN denorm: 0_0000_001 = 2^(-9)
|
||||
define float @from_f8e4m3fn_denorm() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_denorm:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3b000000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN runtime arg test
|
||||
define float @from_f8e4m3fn_dynamic(i8 %x) {
|
||||
; CHECK-LABEL: from_f8e4m3fn_dynamic:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 7, v0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 24, v0
|
||||
; CHECK-NEXT: v_bfe_u32 v0, v0, 3, 4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v1
|
||||
; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
|
||||
; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
|
||||
; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
|
||||
; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
|
||||
; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
|
||||
; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
|
||||
; CHECK-NEXT: v_sub_u32_e32 v4, 0x95, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
|
||||
; CHECK-NEXT: v_add_u32_e32 v2, 0x3c000000, v2
|
||||
; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
|
||||
; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||||
; CHECK-NEXT: v_or_b32_e32 v4, v0, v1
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 15, v0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000
|
||||
; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN (FiniteOnly)
|
||||
; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4
|
||||
; No Inf, no NaN. All bit patterns are finite.
|
||||
|
||||
; Float6E3M2FN normal: 0_011_00 = 1.0
|
||||
define float @from_f6e3m2fn_normal() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_normal:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN max: 0_111_11 = 28.0
|
||||
define float @from_f6e3m2fn_max() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_max:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x41e00000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN denorm: 0_000_01 = 0.0625
|
||||
define float @from_f6e3m2fn_denorm() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_denorm:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3d800000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN zero: 0_000_00 = +0.0
|
||||
define float @from_f6e3m2fn_zero() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_zero:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN negative: 1_011_00 = -1.0
|
||||
define float @from_f6e3m2fn_neg() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_neg:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, -1.0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN runtime arg test
|
||||
define float @from_f6e3m2fn_dynamic(i6 %x) {
|
||||
; CHECK-LABEL: from_f6e3m2fn_dynamic:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 3, v0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 26, v0
|
||||
; CHECK-NEXT: v_bfe_u32 v0, v0, 2, 3
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v1
|
||||
; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
|
||||
; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
|
||||
; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
|
||||
; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
|
||||
; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
|
||||
; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
|
||||
; CHECK-NEXT: v_sub_u32_e32 v4, 0x9a, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
|
||||
; CHECK-NEXT: v_add_u32_e32 v2, 0x3e000000, v2
|
||||
; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
|
||||
; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
|
||||
; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN (FiniteOnly)
|
||||
; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2
|
||||
; No Inf, no NaN. All bit patterns are finite.
|
||||
|
||||
; Float6E2M3FN normal: 0_01_000 = 1.0
|
||||
define float @from_f6e2m3fn_normal() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_normal:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN max: 0_11_111 = 7.5
|
||||
define float @from_f6e2m3fn_max() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_max:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x40f00000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN denorm: 0_00_001 = 0.125
|
||||
define float @from_f6e2m3fn_denorm() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_denorm:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3e000000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN zero: 0_00_000 = +0.0
|
||||
define float @from_f6e2m3fn_zero() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_zero:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN runtime arg test
|
||||
define float @from_f6e2m3fn_dynamic(i6 %x) {
|
||||
; CHECK-LABEL: from_f6e2m3fn_dynamic:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 7, v0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 26, v0
|
||||
; CHECK-NEXT: v_bfe_u32 v0, v0, 3, 2
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v1
|
||||
; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
|
||||
; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
|
||||
; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
|
||||
; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
|
||||
; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
|
||||
; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
|
||||
; CHECK-NEXT: v_sub_u32_e32 v4, 0x9b, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
|
||||
; CHECK-NEXT: v_add_u32_e32 v2, 0.5, v2
|
||||
; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
|
||||
; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
|
||||
; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN (FiniteOnly)
|
||||
; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2
|
||||
; No Inf, no NaN.
|
||||
|
||||
; Float4E2M1FN normal: 0_01_0 = 1.0
|
||||
define float @from_f4e2m1fn_normal() {
|
||||
; CHECK-LABEL: from_f4e2m1fn_normal:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN denorm: 0_00_1 = 0.5
|
||||
define float @from_f4e2m1fn_denorm() {
|
||||
; CHECK-LABEL: from_f4e2m1fn_denorm:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0.5
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN max: 0_11_1 = 6.0
|
||||
define float @from_f4e2m1fn_max() {
|
||||
; CHECK-LABEL: from_f4e2m1fn_max:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x40c00000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN runtime arg test
|
||||
define float @from_f4e2m1fn_dynamic(i4 %x) {
|
||||
; CHECK-LABEL: from_f4e2m1fn_dynamic:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v2, 1, v1
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v1
|
||||
; CHECK-NEXT: v_bfe_u32 v1, v1, 1, 2
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 22, v2
|
||||
; CHECK-NEXT: v_and_b32_e32 v4, 0x80000000, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v5, 23, v1
|
||||
; CHECK-NEXT: v_or3_b32 v3, v5, v4, v3
|
||||
; CHECK-NEXT: v_ffbh_u32_e32 v5, v2
|
||||
; CHECK-NEXT: v_sub_u32_e32 v6, 31, v5
|
||||
; CHECK-NEXT: v_lshlrev_b32_e64 v6, v6, 1
|
||||
; CHECK-NEXT: v_xor_b32_e32 v6, v2, v6
|
||||
; CHECK-NEXT: v_add_u32_e32 v7, -8, v5
|
||||
; CHECK-NEXT: v_sub_u32_e32 v5, 0x9d, v5
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v6, v7, v6
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v5, 23, v5
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
|
||||
; CHECK-NEXT: v_add_u32_e32 v3, 0.5, v3
|
||||
; CHECK-NEXT: v_or3_b32 v5, v4, v5, v6
|
||||
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
|
||||
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 to f16: 1.0
|
||||
define half @from_f8e5m2_to_f16() {
|
||||
; CHECK-LABEL: from_f8e5m2_to_f16:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret half %r
|
||||
}
|
||||
|
||||
; Float8E5M2 to f64: 1.0
|
||||
define double @from_f8e5m2_to_f64() {
|
||||
; CHECK-LABEL: from_f8e5m2_to_f64:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret double %r
|
||||
}
|
||||
|
||||
; Vector test: Float4E2M1FN <4 x i4> -> <4 x float>
|
||||
define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) {
|
||||
; CHECK-LABEL: fp4_to_f32_vec:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
|
||||
; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
|
||||
; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
|
||||
; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
|
||||
; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
|
||||
; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
|
||||
; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
|
||||
; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
|
||||
; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
|
||||
; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
|
||||
; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
|
||||
; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
|
||||
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
|
||||
; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
|
||||
; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
|
||||
; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
|
||||
; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
|
||||
; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
|
||||
; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
|
||||
; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
|
||||
; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
|
||||
; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
|
||||
; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
|
||||
; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 1, v1
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
|
||||
; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
|
||||
; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
|
||||
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
|
||||
; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v8, vcc
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
|
||||
; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
|
||||
; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
|
||||
; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
|
||||
; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
|
||||
; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
|
||||
; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
|
||||
; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
|
||||
; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
|
||||
; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
|
||||
; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
|
||||
; CHECK-NEXT: v_and_b32_e32 v2, 1, v2
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v2
|
||||
; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
|
||||
; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
|
||||
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
|
||||
; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v8, vcc
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v3
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
|
||||
; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
|
||||
; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
|
||||
; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
|
||||
; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
|
||||
; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
|
||||
; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
|
||||
; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
|
||||
; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
|
||||
; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
|
||||
; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
|
||||
; CHECK-NEXT: v_and_b32_e32 v3, 1, v3
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v3
|
||||
; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
|
||||
; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
|
||||
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
|
||||
; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v8, vcc
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN")
|
||||
ret <4 x float> %r
|
||||
}
|
||||
761
llvm/test/CodeGen/NVPTX/arbitrary-fp-to-float.ll
Normal file
761
llvm/test/CodeGen/NVPTX/arbitrary-fp-to-float.ll
Normal file
@ -0,0 +1,761 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s
|
||||
|
||||
; Test llvm.convert.from.arbitrary intrinsic expansion.
|
||||
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata)
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata)
|
||||
declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata)
|
||||
|
||||
declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata)
|
||||
declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata)
|
||||
|
||||
; Float8E5M2
|
||||
; Layout: sign(1) exp(5) mant(2), bias=15
|
||||
; Supports: Inf, NaN, signed zero, denormals
|
||||
|
||||
; Float8E5M2 normal: 0_01111_00 = 1.0
|
||||
define float @from_f8e5m2_normal() {
|
||||
; CHECK-LABEL: from_f8e5m2_normal(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 zero: 0_00000_00 = +0.0
|
||||
define float @from_f8e5m2_zero() {
|
||||
; CHECK-LABEL: from_f8e5m2_zero(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 0;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 negative zero: 1_00000_00 = -0.0
|
||||
define float @from_f8e5m2_neg_zero() {
|
||||
; CHECK-LABEL: from_f8e5m2_neg_zero(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], -2147483648;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 denorm: 0_00000_01 = 2^(-16)
|
||||
define float @from_f8e5m2_denorm() {
|
||||
; CHECK-LABEL: from_f8e5m2_denorm(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 931135488;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 +Inf: 0_11111_00
|
||||
define float @from_f8e5m2_inf() {
|
||||
; CHECK-LABEL: from_f8e5m2_inf(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 2139095040;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 NaN: 0_11111_01
|
||||
define float @from_f8e5m2_nan() {
|
||||
; CHECK-LABEL: from_f8e5m2_nan(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 2143289344;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 max: 0_11110_11 = 57344
|
||||
define float @from_f8e5m2_max() {
|
||||
; CHECK-LABEL: from_f8e5m2_max(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1197473792;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 negative: 1_01111_00 = -1.0
|
||||
define float @from_f8e5m2_neg() {
|
||||
; CHECK-LABEL: from_f8e5m2_neg(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], -1082130432;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 runtime arg test
|
||||
define float @from_f8e5m2_dynamic(i8 %x) {
|
||||
; CHECK-LABEL: from_f8e5m2_dynamic(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<6>;
|
||||
; CHECK-NEXT: .reg .b32 %r<31>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [from_f8e5m2_dynamic_param_0];
|
||||
; CHECK-NEXT: shl.b32 %r2, %r1, 24;
|
||||
; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
|
||||
; CHECK-NEXT: and.b32 %r4, %r1, 3;
|
||||
; CHECK-NEXT: clz.b32 %r5, %r4;
|
||||
; CHECK-NEXT: sub.s32 %r6, 142, %r5;
|
||||
; CHECK-NEXT: shl.b32 %r7, %r6, 23;
|
||||
; CHECK-NEXT: or.b32 %r8, %r3, %r7;
|
||||
; CHECK-NEXT: sub.s32 %r9, 31, %r5;
|
||||
; CHECK-NEXT: mov.b32 %r10, 1;
|
||||
; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
|
||||
; CHECK-NEXT: add.s32 %r13, %r5, -8;
|
||||
; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
|
||||
; CHECK-NEXT: or.b32 %r15, %r8, %r14;
|
||||
; CHECK-NEXT: bfe.u32 %r16, %r1, 2, 5;
|
||||
; CHECK-NEXT: shl.b32 %r17, %r16, 23;
|
||||
; CHECK-NEXT: or.b32 %r18, %r17, %r3;
|
||||
; CHECK-NEXT: shl.b32 %r19, %r4, 21;
|
||||
; CHECK-NEXT: or.b32 %r20, %r18, %r19;
|
||||
; CHECK-NEXT: add.s32 %r21, %r20, 939524096;
|
||||
; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0;
|
||||
; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
|
||||
; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
|
||||
; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
|
||||
; CHECK-NEXT: or.b32 %r24, %r16, %r4;
|
||||
; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
|
||||
; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
|
||||
; CHECK-NEXT: setp.eq.b32 %p4, %r4, 0;
|
||||
; CHECK-NEXT: or.b32 %r26, %r3, 2139095040;
|
||||
; CHECK-NEXT: selp.b32 %r27, %r26, %r25, %p4;
|
||||
; CHECK-NEXT: setp.eq.b32 %p5, %r16, 31;
|
||||
; CHECK-NEXT: selp.b32 %r28, %r27, %r25, %p5;
|
||||
; CHECK-NEXT: selp.b32 %r29, 2143289344, %r28, %p1;
|
||||
; CHECK-NEXT: selp.b32 %r30, %r29, %r28, %p5;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r30;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN (NanOnly, NanEncoding=AllOnes)
|
||||
; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7
|
||||
; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite.
|
||||
|
||||
; Float8E4M3FN normal: 0_0111_000 = 1.0
|
||||
define float @from_f8e4m3fn_normal() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_normal(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN NaN: 0_1111_111
|
||||
define float @from_f8e4m3fn_nan() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_nan(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 2143289344;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN not-NaN: 0_1111_110 = 448
|
||||
; Despite exp=all-ones, this is a valid finite number (max value)
|
||||
define float @from_f8e4m3fn_max() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_max(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1138753536;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN not-NaN: 0_1111_101 = 416
|
||||
; exp=all-ones but mant!=all-ones so this is finite
|
||||
define float @from_f8e4m3fn_not_nan() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_not_nan(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1137704960;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN zero: 0_0000_000 = +0.0
|
||||
define float @from_f8e4m3fn_zero() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_zero(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 0;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN denorm: 0_0000_001 = 2^(-9)
|
||||
define float @from_f8e4m3fn_denorm() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_denorm(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 989855744;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN runtime arg test
|
||||
define float @from_f8e4m3fn_dynamic(i8 %x) {
|
||||
; CHECK-LABEL: from_f8e4m3fn_dynamic(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<6>;
|
||||
; CHECK-NEXT: .reg .b32 %r<28>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [from_f8e4m3fn_dynamic_param_0];
|
||||
; CHECK-NEXT: shl.b32 %r2, %r1, 24;
|
||||
; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
|
||||
; CHECK-NEXT: and.b32 %r4, %r1, 7;
|
||||
; CHECK-NEXT: clz.b32 %r5, %r4;
|
||||
; CHECK-NEXT: sub.s32 %r6, 149, %r5;
|
||||
; CHECK-NEXT: shl.b32 %r7, %r6, 23;
|
||||
; CHECK-NEXT: or.b32 %r8, %r3, %r7;
|
||||
; CHECK-NEXT: sub.s32 %r9, 31, %r5;
|
||||
; CHECK-NEXT: mov.b32 %r10, 1;
|
||||
; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
|
||||
; CHECK-NEXT: add.s32 %r13, %r5, -8;
|
||||
; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
|
||||
; CHECK-NEXT: or.b32 %r15, %r8, %r14;
|
||||
; CHECK-NEXT: bfe.u32 %r16, %r1, 3, 4;
|
||||
; CHECK-NEXT: shl.b32 %r17, %r16, 23;
|
||||
; CHECK-NEXT: or.b32 %r18, %r17, %r3;
|
||||
; CHECK-NEXT: shl.b32 %r19, %r4, 20;
|
||||
; CHECK-NEXT: or.b32 %r20, %r18, %r19;
|
||||
; CHECK-NEXT: add.s32 %r21, %r20, 1006632960;
|
||||
; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0;
|
||||
; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
|
||||
; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
|
||||
; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
|
||||
; CHECK-NEXT: or.b32 %r24, %r16, %r4;
|
||||
; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
|
||||
; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
|
||||
; CHECK-NEXT: setp.eq.b32 %p4, %r4, 7;
|
||||
; CHECK-NEXT: selp.b32 %r26, 2143289344, %r25, %p4;
|
||||
; CHECK-NEXT: setp.eq.b32 %p5, %r16, 15;
|
||||
; CHECK-NEXT: selp.b32 %r27, %r26, %r25, %p5;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r27;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN (FiniteOnly)
|
||||
; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4
|
||||
; No Inf, no NaN. All bit patterns are finite.
|
||||
|
||||
; Float6E3M2FN normal: 0_011_00 = 1.0
|
||||
define float @from_f6e3m2fn_normal() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_normal(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN max: 0_111_11 = 28.0
|
||||
define float @from_f6e3m2fn_max() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_max(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1105199104;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN denorm: 0_000_01 = 0.0625
|
||||
define float @from_f6e3m2fn_denorm() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_denorm(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1031798784;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN zero: 0_000_00 = +0.0
|
||||
define float @from_f6e3m2fn_zero() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_zero(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 0;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN negative: 1_011_00 = -1.0
|
||||
define float @from_f6e3m2fn_neg() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_neg(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], -1082130432;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN runtime arg test
|
||||
define float @from_f6e3m2fn_dynamic(i6 %x) {
|
||||
; CHECK-LABEL: from_f6e3m2fn_dynamic(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<4>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<5>;
|
||||
; CHECK-NEXT: .reg .b32 %r<26>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [from_f6e3m2fn_dynamic_param_0+1];
|
||||
; CHECK-NEXT: shl.b16 %rs2, %rs1, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [from_f6e3m2fn_dynamic_param_0];
|
||||
; CHECK-NEXT: or.b16 %rs4, %rs2, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs4;
|
||||
; CHECK-NEXT: shl.b32 %r2, %r1, 26;
|
||||
; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
|
||||
; CHECK-NEXT: and.b32 %r4, %r1, 3;
|
||||
; CHECK-NEXT: clz.b32 %r5, %r4;
|
||||
; CHECK-NEXT: sub.s32 %r6, 154, %r5;
|
||||
; CHECK-NEXT: shl.b32 %r7, %r6, 23;
|
||||
; CHECK-NEXT: or.b32 %r8, %r3, %r7;
|
||||
; CHECK-NEXT: sub.s32 %r9, 31, %r5;
|
||||
; CHECK-NEXT: mov.b32 %r10, 1;
|
||||
; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
|
||||
; CHECK-NEXT: add.s32 %r13, %r5, -8;
|
||||
; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
|
||||
; CHECK-NEXT: or.b32 %r15, %r8, %r14;
|
||||
; CHECK-NEXT: bfe.u32 %r16, %r1, 2, 3;
|
||||
; CHECK-NEXT: shl.b32 %r17, %r16, 23;
|
||||
; CHECK-NEXT: or.b32 %r18, %r17, %r3;
|
||||
; CHECK-NEXT: shl.b32 %r19, %r4, 21;
|
||||
; CHECK-NEXT: or.b32 %r20, %r18, %r19;
|
||||
; CHECK-NEXT: add.s32 %r21, %r20, 1040187392;
|
||||
; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0;
|
||||
; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
|
||||
; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
|
||||
; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
|
||||
; CHECK-NEXT: or.b32 %r24, %r16, %r4;
|
||||
; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
|
||||
; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r25;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN (FiniteOnly)
|
||||
; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2
|
||||
; No Inf, no NaN. All bit patterns are finite.
|
||||
|
||||
; Float6E2M3FN normal: 0_01_000 = 1.0
|
||||
define float @from_f6e2m3fn_normal() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_normal(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN max: 0_11_111 = 7.5
|
||||
define float @from_f6e2m3fn_max() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_max(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1089470464;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN denorm: 0_00_001 = 0.125
|
||||
define float @from_f6e2m3fn_denorm() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_denorm(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1040187392;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN zero: 0_00_000 = +0.0
|
||||
define float @from_f6e2m3fn_zero() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_zero(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 0;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN runtime arg test
|
||||
define float @from_f6e2m3fn_dynamic(i6 %x) {
|
||||
; CHECK-LABEL: from_f6e2m3fn_dynamic(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<4>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<5>;
|
||||
; CHECK-NEXT: .reg .b32 %r<26>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [from_f6e2m3fn_dynamic_param_0+1];
|
||||
; CHECK-NEXT: shl.b16 %rs2, %rs1, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [from_f6e2m3fn_dynamic_param_0];
|
||||
; CHECK-NEXT: or.b16 %rs4, %rs2, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs4;
|
||||
; CHECK-NEXT: shl.b32 %r2, %r1, 26;
|
||||
; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
|
||||
; CHECK-NEXT: and.b32 %r4, %r1, 7;
|
||||
; CHECK-NEXT: clz.b32 %r5, %r4;
|
||||
; CHECK-NEXT: sub.s32 %r6, 155, %r5;
|
||||
; CHECK-NEXT: shl.b32 %r7, %r6, 23;
|
||||
; CHECK-NEXT: or.b32 %r8, %r3, %r7;
|
||||
; CHECK-NEXT: sub.s32 %r9, 31, %r5;
|
||||
; CHECK-NEXT: mov.b32 %r10, 1;
|
||||
; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
|
||||
; CHECK-NEXT: add.s32 %r13, %r5, -8;
|
||||
; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
|
||||
; CHECK-NEXT: or.b32 %r15, %r8, %r14;
|
||||
; CHECK-NEXT: bfe.u32 %r16, %r1, 3, 2;
|
||||
; CHECK-NEXT: shl.b32 %r17, %r16, 23;
|
||||
; CHECK-NEXT: or.b32 %r18, %r17, %r3;
|
||||
; CHECK-NEXT: shl.b32 %r19, %r4, 20;
|
||||
; CHECK-NEXT: or.b32 %r20, %r18, %r19;
|
||||
; CHECK-NEXT: add.s32 %r21, %r20, 1056964608;
|
||||
; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0;
|
||||
; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
|
||||
; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
|
||||
; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
|
||||
; CHECK-NEXT: or.b32 %r24, %r16, %r4;
|
||||
; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
|
||||
; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r25;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN (FiniteOnly)
|
||||
; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2
|
||||
; No Inf, no NaN.
|
||||
|
||||
; Float4E2M1FN normal: 0_01_0 = 1.0
|
||||
define float @from_f4e2m1fn_normal() {
|
||||
; CHECK-LABEL: from_f4e2m1fn_normal(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN denorm: 0_00_1 = 0.5
|
||||
define float @from_f4e2m1fn_denorm() {
|
||||
; CHECK-LABEL: from_f4e2m1fn_denorm(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1056964608;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN max: 0_11_1 = 6.0
|
||||
define float @from_f4e2m1fn_max() {
|
||||
; CHECK-LABEL: from_f4e2m1fn_max(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], 1086324736;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN runtime arg test
|
||||
define float @from_f4e2m1fn_dynamic(i4 %x) {
|
||||
; CHECK-LABEL: from_f4e2m1fn_dynamic(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<4>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<6>;
|
||||
; CHECK-NEXT: .reg .b32 %r<26>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [from_f4e2m1fn_dynamic_param_0+1];
|
||||
; CHECK-NEXT: shl.b16 %rs2, %rs1, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [from_f4e2m1fn_dynamic_param_0];
|
||||
; CHECK-NEXT: or.b16 %rs4, %rs2, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs4;
|
||||
; CHECK-NEXT: shl.b32 %r2, %r1, 28;
|
||||
; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
|
||||
; CHECK-NEXT: and.b32 %r4, %r1, 1;
|
||||
; CHECK-NEXT: clz.b32 %r5, %r4;
|
||||
; CHECK-NEXT: sub.s32 %r6, 157, %r5;
|
||||
; CHECK-NEXT: shl.b32 %r7, %r6, 23;
|
||||
; CHECK-NEXT: or.b32 %r8, %r3, %r7;
|
||||
; CHECK-NEXT: sub.s32 %r9, 31, %r5;
|
||||
; CHECK-NEXT: mov.b32 %r10, 1;
|
||||
; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
|
||||
; CHECK-NEXT: add.s32 %r13, %r5, -8;
|
||||
; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
|
||||
; CHECK-NEXT: or.b32 %r15, %r8, %r14;
|
||||
; CHECK-NEXT: bfe.u32 %r16, %r1, 1, 2;
|
||||
; CHECK-NEXT: shl.b32 %r17, %r16, 23;
|
||||
; CHECK-NEXT: or.b32 %r18, %r17, %r3;
|
||||
; CHECK-NEXT: shl.b32 %r19, %r4, 22;
|
||||
; CHECK-NEXT: or.b32 %r20, %r18, %r19;
|
||||
; CHECK-NEXT: add.s32 %r21, %r20, 1056964608;
|
||||
; CHECK-NEXT: and.b16 %rs5, %rs3, 1;
|
||||
; CHECK-NEXT: setp.ne.b16 %p1, %rs5, 0;
|
||||
; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
|
||||
; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
|
||||
; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
|
||||
; CHECK-NEXT: or.b32 %r24, %r16, %r4;
|
||||
; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
|
||||
; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r25;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 to f16: 1.0
|
||||
define half @from_f8e5m2_to_f16() {
|
||||
; CHECK-LABEL: from_f8e5m2_to_f16(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], 0x3C00;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret half %r
|
||||
}
|
||||
|
||||
; Float8E5M2 to f64: 1.0
|
||||
define double @from_f8e5m2_to_f64() {
|
||||
; CHECK-LABEL: from_f8e5m2_to_f64(
|
||||
; CHECK: {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], 4607182418800017408;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret double %r
|
||||
}
|
||||
|
||||
; Vector test: Float4E2M1FN <4 x i4> -> <4 x float>
|
||||
define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) {
|
||||
; CHECK-LABEL: fp4_to_f32_vec(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<13>;
|
||||
; CHECK-NEXT: .reg .b32 %r<101>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %r1, [fp4_to_f32_vec_param_0+2];
|
||||
; CHECK-NEXT: shl.b32 %r2, %r1, 16;
|
||||
; CHECK-NEXT: ld.param.b16 %r3, [fp4_to_f32_vec_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x7771U;
|
||||
; CHECK-NEXT: shl.b32 %r5, %r4, 28;
|
||||
; CHECK-NEXT: and.b32 %r6, %r5, -2147483648;
|
||||
; CHECK-NEXT: and.b32 %r7, %r4, 1;
|
||||
; CHECK-NEXT: clz.b32 %r8, %r7;
|
||||
; CHECK-NEXT: sub.s32 %r9, 157, %r8;
|
||||
; CHECK-NEXT: shl.b32 %r10, %r9, 23;
|
||||
; CHECK-NEXT: or.b32 %r11, %r6, %r10;
|
||||
; CHECK-NEXT: sub.s32 %r12, 31, %r8;
|
||||
; CHECK-NEXT: mov.b32 %r13, 1;
|
||||
; CHECK-NEXT: shl.b32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: xor.b32 %r15, %r7, %r14;
|
||||
; CHECK-NEXT: add.s32 %r16, %r8, -8;
|
||||
; CHECK-NEXT: shl.b32 %r17, %r15, %r16;
|
||||
; CHECK-NEXT: or.b32 %r18, %r11, %r17;
|
||||
; CHECK-NEXT: bfe.u32 %r19, %r4, 1, 2;
|
||||
; CHECK-NEXT: shl.b32 %r20, %r19, 23;
|
||||
; CHECK-NEXT: or.b32 %r21, %r20, %r6;
|
||||
; CHECK-NEXT: shl.b32 %r22, %r7, 22;
|
||||
; CHECK-NEXT: or.b32 %r23, %r21, %r22;
|
||||
; CHECK-NEXT: add.s32 %r24, %r23, 1056964608;
|
||||
; CHECK-NEXT: setp.ne.b32 %p1, %r7, 0;
|
||||
; CHECK-NEXT: selp.b32 %r25, %r18, %r24, %p1;
|
||||
; CHECK-NEXT: setp.eq.b32 %p2, %r19, 0;
|
||||
; CHECK-NEXT: selp.b32 %r26, %r25, %r24, %p2;
|
||||
; CHECK-NEXT: or.b32 %r27, %r19, %r7;
|
||||
; CHECK-NEXT: setp.eq.b32 %p3, %r27, 0;
|
||||
; CHECK-NEXT: selp.b32 %r28, %r6, %r26, %p3;
|
||||
; CHECK-NEXT: prmt.b32 %r29, %r3, 0, 0x7770U;
|
||||
; CHECK-NEXT: shl.b32 %r30, %r29, 28;
|
||||
; CHECK-NEXT: and.b32 %r31, %r30, -2147483648;
|
||||
; CHECK-NEXT: and.b32 %r32, %r29, 1;
|
||||
; CHECK-NEXT: clz.b32 %r33, %r32;
|
||||
; CHECK-NEXT: sub.s32 %r34, 157, %r33;
|
||||
; CHECK-NEXT: shl.b32 %r35, %r34, 23;
|
||||
; CHECK-NEXT: or.b32 %r36, %r31, %r35;
|
||||
; CHECK-NEXT: sub.s32 %r37, 31, %r33;
|
||||
; CHECK-NEXT: shl.b32 %r38, %r13, %r37;
|
||||
; CHECK-NEXT: xor.b32 %r39, %r32, %r38;
|
||||
; CHECK-NEXT: add.s32 %r40, %r33, -8;
|
||||
; CHECK-NEXT: shl.b32 %r41, %r39, %r40;
|
||||
; CHECK-NEXT: or.b32 %r42, %r36, %r41;
|
||||
; CHECK-NEXT: bfe.u32 %r43, %r29, 1, 2;
|
||||
; CHECK-NEXT: shl.b32 %r44, %r43, 23;
|
||||
; CHECK-NEXT: or.b32 %r45, %r44, %r31;
|
||||
; CHECK-NEXT: shl.b32 %r46, %r32, 22;
|
||||
; CHECK-NEXT: or.b32 %r47, %r45, %r46;
|
||||
; CHECK-NEXT: add.s32 %r48, %r47, 1056964608;
|
||||
; CHECK-NEXT: setp.ne.b32 %p4, %r32, 0;
|
||||
; CHECK-NEXT: selp.b32 %r49, %r42, %r48, %p4;
|
||||
; CHECK-NEXT: setp.eq.b32 %p5, %r43, 0;
|
||||
; CHECK-NEXT: selp.b32 %r50, %r49, %r48, %p5;
|
||||
; CHECK-NEXT: or.b32 %r51, %r43, %r32;
|
||||
; CHECK-NEXT: setp.eq.b32 %p6, %r51, 0;
|
||||
; CHECK-NEXT: selp.b32 %r52, %r31, %r50, %p6;
|
||||
; CHECK-NEXT: prmt.b32 %r53, %r2, 0, 0x7773U;
|
||||
; CHECK-NEXT: shl.b32 %r54, %r53, 28;
|
||||
; CHECK-NEXT: and.b32 %r55, %r54, -2147483648;
|
||||
; CHECK-NEXT: and.b32 %r56, %r53, 1;
|
||||
; CHECK-NEXT: clz.b32 %r57, %r56;
|
||||
; CHECK-NEXT: sub.s32 %r58, 157, %r57;
|
||||
; CHECK-NEXT: shl.b32 %r59, %r58, 23;
|
||||
; CHECK-NEXT: or.b32 %r60, %r55, %r59;
|
||||
; CHECK-NEXT: sub.s32 %r61, 31, %r57;
|
||||
; CHECK-NEXT: shl.b32 %r62, %r13, %r61;
|
||||
; CHECK-NEXT: xor.b32 %r63, %r56, %r62;
|
||||
; CHECK-NEXT: add.s32 %r64, %r57, -8;
|
||||
; CHECK-NEXT: shl.b32 %r65, %r63, %r64;
|
||||
; CHECK-NEXT: or.b32 %r66, %r60, %r65;
|
||||
; CHECK-NEXT: bfe.u32 %r67, %r53, 1, 2;
|
||||
; CHECK-NEXT: shl.b32 %r68, %r67, 23;
|
||||
; CHECK-NEXT: or.b32 %r69, %r68, %r55;
|
||||
; CHECK-NEXT: shl.b32 %r70, %r56, 22;
|
||||
; CHECK-NEXT: or.b32 %r71, %r69, %r70;
|
||||
; CHECK-NEXT: add.s32 %r72, %r71, 1056964608;
|
||||
; CHECK-NEXT: setp.ne.b32 %p7, %r56, 0;
|
||||
; CHECK-NEXT: selp.b32 %r73, %r66, %r72, %p7;
|
||||
; CHECK-NEXT: setp.eq.b32 %p8, %r67, 0;
|
||||
; CHECK-NEXT: selp.b32 %r74, %r73, %r72, %p8;
|
||||
; CHECK-NEXT: or.b32 %r75, %r67, %r56;
|
||||
; CHECK-NEXT: setp.eq.b32 %p9, %r75, 0;
|
||||
; CHECK-NEXT: selp.b32 %r76, %r55, %r74, %p9;
|
||||
; CHECK-NEXT: prmt.b32 %r77, %r2, 0, 0x7772U;
|
||||
; CHECK-NEXT: shl.b32 %r78, %r77, 28;
|
||||
; CHECK-NEXT: and.b32 %r79, %r78, -2147483648;
|
||||
; CHECK-NEXT: and.b32 %r80, %r77, 1;
|
||||
; CHECK-NEXT: clz.b32 %r81, %r80;
|
||||
; CHECK-NEXT: sub.s32 %r82, 157, %r81;
|
||||
; CHECK-NEXT: shl.b32 %r83, %r82, 23;
|
||||
; CHECK-NEXT: or.b32 %r84, %r79, %r83;
|
||||
; CHECK-NEXT: sub.s32 %r85, 31, %r81;
|
||||
; CHECK-NEXT: shl.b32 %r86, %r13, %r85;
|
||||
; CHECK-NEXT: xor.b32 %r87, %r80, %r86;
|
||||
; CHECK-NEXT: add.s32 %r88, %r81, -8;
|
||||
; CHECK-NEXT: shl.b32 %r89, %r87, %r88;
|
||||
; CHECK-NEXT: or.b32 %r90, %r84, %r89;
|
||||
; CHECK-NEXT: bfe.u32 %r91, %r77, 1, 2;
|
||||
; CHECK-NEXT: shl.b32 %r92, %r91, 23;
|
||||
; CHECK-NEXT: or.b32 %r93, %r92, %r79;
|
||||
; CHECK-NEXT: shl.b32 %r94, %r80, 22;
|
||||
; CHECK-NEXT: or.b32 %r95, %r93, %r94;
|
||||
; CHECK-NEXT: add.s32 %r96, %r95, 1056964608;
|
||||
; CHECK-NEXT: setp.ne.b32 %p10, %r80, 0;
|
||||
; CHECK-NEXT: selp.b32 %r97, %r90, %r96, %p10;
|
||||
; CHECK-NEXT: setp.eq.b32 %p11, %r91, 0;
|
||||
; CHECK-NEXT: selp.b32 %r98, %r97, %r96, %p11;
|
||||
; CHECK-NEXT: or.b32 %r99, %r91, %r80;
|
||||
; CHECK-NEXT: setp.eq.b32 %p12, %r99, 0;
|
||||
; CHECK-NEXT: selp.b32 %r100, %r79, %r98, %p12;
|
||||
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r52, %r28, %r100, %r76};
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN")
|
||||
ret <4 x float> %r
|
||||
}
|
||||
76
llvm/test/CodeGen/X86/arbitrary-fp-convert-error.ll
Normal file
76
llvm/test/CodeGen/X86/arbitrary-fp-convert-error.ll
Normal file
@ -0,0 +1,76 @@
|
||||
; RUN: split-file %s %t
|
||||
; RUN: not llc < %t/float8e4m3.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E4M3
|
||||
; RUN: not llc < %t/float8e3m4.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E3M4
|
||||
; RUN: not llc < %t/float8e5m2fnuz.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E5M2FNUZ
|
||||
; RUN: not llc < %t/float8e4m3fnuz.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E4M3FNUZ
|
||||
; RUN: not llc < %t/float8e4m3b11fnuz.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E4M3B11FNUZ
|
||||
; RUN: not llc < %t/float8e8m0fnu.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E8M0FNU
|
||||
|
||||
; Test that llvm.convert.from.arbitrary.fp emits an error for formats that pass
|
||||
; verifier validation but are not yet implemented in SelectionDAGBuilder.
|
||||
|
||||
;--- float8e4m3.ll
|
||||
; E4M3: error: convert_from_arbitrary_fp: not implemented format 'Float8E4M3'
|
||||
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
|
||||
|
||||
define float @from_f8e4m3(i8 %v) {
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
|
||||
i8 %v, metadata !"Float8E4M3")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;--- float8e3m4.ll
|
||||
; E3M4: error: convert_from_arbitrary_fp: not implemented format 'Float8E3M4'
|
||||
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
|
||||
|
||||
define float @from_f8e3m4(i8 %v) {
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
|
||||
i8 %v, metadata !"Float8E3M4")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;--- float8e5m2fnuz.ll
|
||||
; E5M2FNUZ: error: convert_from_arbitrary_fp: not implemented format 'Float8E5M2FNUZ'
|
||||
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
|
||||
|
||||
define float @from_f8e5m2fnuz(i8 %v) {
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
|
||||
i8 %v, metadata !"Float8E5M2FNUZ")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;--- float8e4m3fnuz.ll
|
||||
; E4M3FNUZ: error: convert_from_arbitrary_fp: not implemented format 'Float8E4M3FNUZ'
|
||||
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
|
||||
|
||||
define float @from_f8e4m3fnuz(i8 %v) {
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
|
||||
i8 %v, metadata !"Float8E4M3FNUZ")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;--- float8e4m3b11fnuz.ll
|
||||
; E4M3B11FNUZ: error: convert_from_arbitrary_fp: not implemented format 'Float8E4M3B11FNUZ'
|
||||
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
|
||||
|
||||
define float @from_f8e4m3b11fnuz(i8 %v) {
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
|
||||
i8 %v, metadata !"Float8E4M3B11FNUZ")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;--- float8e8m0fnu.ll
|
||||
; E8M0FNU: error: convert_from_arbitrary_fp: not implemented format 'Float8E8M0FNU'
|
||||
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
|
||||
|
||||
define float @from_f8e8m0fnu(i8 %v) {
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
|
||||
i8 %v, metadata !"Float8E8M0FNU")
|
||||
ret float %r
|
||||
}
|
||||
727
llvm/test/CodeGen/X86/arbitrary-fp-to-float.ll
Normal file
727
llvm/test/CodeGen/X86/arbitrary-fp-to-float.ll
Normal file
@ -0,0 +1,727 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
||||
|
||||
; Test llvm.convert.from.arbitrary intrinsic expansion.
|
||||
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata)
|
||||
declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata)
|
||||
declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata)
|
||||
|
||||
declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata)
|
||||
declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata)
|
||||
|
||||
; Float8E5M2
|
||||
; Layout: sign(1) exp(5) mant(2), bias=15
|
||||
; Supports: Inf, NaN, signed zero, denormals
|
||||
|
||||
; Float8E5M2 normal: 0_01111_00 = 1.0
|
||||
define float @from_f8e5m2_normal() {
|
||||
; CHECK-LABEL: from_f8e5m2_normal:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 zero: 0_00000_00 = +0.0
|
||||
define float @from_f8e5m2_zero() {
|
||||
; CHECK-LABEL: from_f8e5m2_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 negative zero: 1_00000_00 = -0.0
|
||||
define float @from_f8e5m2_neg_zero() {
|
||||
; CHECK-LABEL: from_f8e5m2_neg_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [-0.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 denorm: 0_00000_01 = 2^(-16)
|
||||
define float @from_f8e5m2_denorm() {
|
||||
; CHECK-LABEL: from_f8e5m2_denorm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.52587891E-5,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 +Inf: 0_11111_00
|
||||
define float @from_f8e5m2_inf() {
|
||||
; CHECK-LABEL: from_f8e5m2_inf:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 NaN: 0_11111_01
|
||||
define float @from_f8e5m2_nan() {
|
||||
; CHECK-LABEL: from_f8e5m2_nan:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 max: 0_11110_11 = 57344
|
||||
define float @from_f8e5m2_max() {
|
||||
; CHECK-LABEL: from_f8e5m2_max:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [5.7344E+4,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 negative: 1_01111_00 = -1.0
|
||||
define float @from_f8e5m2_neg() {
|
||||
; CHECK-LABEL: from_f8e5m2_neg:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 runtime arg test
|
||||
define float @from_f8e5m2_dynamic(i8 %x) {
|
||||
; CHECK-LABEL: from_f8e5m2_dynamic:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %edx
|
||||
; CHECK-NEXT: andl $3, %edx
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shll $21, %ecx
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: andl $-128, %eax
|
||||
; CHECK-NEXT: shll $24, %eax
|
||||
; CHECK-NEXT: shrl $2, %edi
|
||||
; CHECK-NEXT: andl $31, %edi
|
||||
; CHECK-NEXT: movl %edi, %esi
|
||||
; CHECK-NEXT: shll $23, %esi
|
||||
; CHECK-NEXT: orl %eax, %esi
|
||||
; CHECK-NEXT: leal 939524096(%rcx,%rsi), %esi
|
||||
; CHECK-NEXT: bsrl %edx, %r8d
|
||||
; CHECK-NEXT: movl %edx, %r9d
|
||||
; CHECK-NEXT: btcl %r8d, %r9d
|
||||
; CHECK-NEXT: xorl $31, %r8d
|
||||
; CHECK-NEXT: leal -8(%r8), %ecx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; CHECK-NEXT: shll %cl, %r9d
|
||||
; CHECK-NEXT: movl $142, %ecx
|
||||
; CHECK-NEXT: subl %r8d, %ecx
|
||||
; CHECK-NEXT: shll $23, %ecx
|
||||
; CHECK-NEXT: orl %eax, %ecx
|
||||
; CHECK-NEXT: orl %r9d, %ecx
|
||||
; CHECK-NEXT: testl %edx, %edx
|
||||
; CHECK-NEXT: sete %dl
|
||||
; CHECK-NEXT: setne %r8b
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: sete %r9b
|
||||
; CHECK-NEXT: testb %r8b, %r9b
|
||||
; CHECK-NEXT: cmovel %esi, %ecx
|
||||
; CHECK-NEXT: testb %dl, %r9b
|
||||
; CHECK-NEXT: cmovnel %eax, %ecx
|
||||
; CHECK-NEXT: orl $2139095040, %eax # imm = 0x7F800000
|
||||
; CHECK-NEXT: cmpl $31, %edi
|
||||
; CHECK-NEXT: sete %sil
|
||||
; CHECK-NEXT: testb %dl, %sil
|
||||
; CHECK-NEXT: cmovel %ecx, %eax
|
||||
; CHECK-NEXT: testb %r8b, %sil
|
||||
; CHECK-NEXT: movl $2143289344, %ecx # imm = 0x7FC00000
|
||||
; CHECK-NEXT: cmovel %eax, %ecx
|
||||
; CHECK-NEXT: movd %ecx, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN (NanOnly, NanEncoding=AllOnes)
|
||||
; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7
|
||||
; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite.
|
||||
|
||||
; Float8E4M3FN normal: 0_0111_000 = 1.0
|
||||
define float @from_f8e4m3fn_normal() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_normal:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN NaN: 0_1111_111
|
||||
define float @from_f8e4m3fn_nan() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_nan:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN not-NaN: 0_1111_110 = 448
|
||||
; Despite exp=all-ones, this is a valid finite number (max value)
|
||||
define float @from_f8e4m3fn_max() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_max:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.48E+2,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN not-NaN: 0_1111_101 = 416
|
||||
; exp=all-ones but mant!=all-ones so this is finite
|
||||
define float @from_f8e4m3fn_not_nan() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_not_nan:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.16E+2,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN zero: 0_0000_000 = +0.0
|
||||
define float @from_f8e4m3fn_zero() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN denorm: 0_0000_001 = 2^(-9)
|
||||
define float @from_f8e4m3fn_denorm() {
|
||||
; CHECK-LABEL: from_f8e4m3fn_denorm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.953125E-3,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E4M3FN runtime arg test
|
||||
define float @from_f8e4m3fn_dynamic(i8 %x) {
|
||||
; CHECK-LABEL: from_f8e4m3fn_dynamic:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: andl $7, %eax
|
||||
; CHECK-NEXT: movl %eax, %ecx
|
||||
; CHECK-NEXT: shll $20, %ecx
|
||||
; CHECK-NEXT: movl %edi, %edx
|
||||
; CHECK-NEXT: andl $-128, %edx
|
||||
; CHECK-NEXT: shll $24, %edx
|
||||
; CHECK-NEXT: shrl $3, %edi
|
||||
; CHECK-NEXT: andl $15, %edi
|
||||
; CHECK-NEXT: movl %edi, %esi
|
||||
; CHECK-NEXT: shll $23, %esi
|
||||
; CHECK-NEXT: orl %edx, %esi
|
||||
; CHECK-NEXT: leal 1006632960(%rcx,%rsi), %esi
|
||||
; CHECK-NEXT: bsrl %eax, %r8d
|
||||
; CHECK-NEXT: movl %eax, %r9d
|
||||
; CHECK-NEXT: btcl %r8d, %r9d
|
||||
; CHECK-NEXT: xorl $31, %r8d
|
||||
; CHECK-NEXT: leal -8(%r8), %ecx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; CHECK-NEXT: shll %cl, %r9d
|
||||
; CHECK-NEXT: movl $149, %ecx
|
||||
; CHECK-NEXT: subl %r8d, %ecx
|
||||
; CHECK-NEXT: shll $23, %ecx
|
||||
; CHECK-NEXT: orl %edx, %ecx
|
||||
; CHECK-NEXT: orl %r9d, %ecx
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK-NEXT: sete %r8b
|
||||
; CHECK-NEXT: setne %r9b
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: sete %r10b
|
||||
; CHECK-NEXT: testb %r9b, %r10b
|
||||
; CHECK-NEXT: cmovel %esi, %ecx
|
||||
; CHECK-NEXT: testb %r8b, %r10b
|
||||
; CHECK-NEXT: cmovnel %edx, %ecx
|
||||
; CHECK-NEXT: cmpl $7, %eax
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: cmpl $15, %edi
|
||||
; CHECK-NEXT: sete %dl
|
||||
; CHECK-NEXT: testb %al, %dl
|
||||
; CHECK-NEXT: movl $2143289344, %eax # imm = 0x7FC00000
|
||||
; CHECK-NEXT: cmovel %ecx, %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN (FiniteOnly)
|
||||
; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4
|
||||
; No Inf, no NaN. All bit patterns are finite.
|
||||
|
||||
; Float6E3M2FN normal: 0_011_00 = 1.0
|
||||
define float @from_f6e3m2fn_normal() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_normal:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN max: 0_111_11 = 28.0
|
||||
define float @from_f6e3m2fn_max() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_max:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.8E+1,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN denorm: 0_000_01 = 0.0625
|
||||
define float @from_f6e3m2fn_denorm() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_denorm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [6.25E-2,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN zero: 0_000_00 = +0.0
|
||||
define float @from_f6e3m2fn_zero() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN negative: 1_011_00 = -1.0
|
||||
define float @from_f6e3m2fn_neg() {
|
||||
; CHECK-LABEL: from_f6e3m2fn_neg:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E3M2FN runtime arg test
|
||||
define float @from_f6e3m2fn_dynamic(i6 %x) {
|
||||
; CHECK-LABEL: from_f6e3m2fn_dynamic:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %edx
|
||||
; CHECK-NEXT: andl $3, %edx
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shll $21, %ecx
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: andl $-32, %eax
|
||||
; CHECK-NEXT: shll $26, %eax
|
||||
; CHECK-NEXT: shrl $2, %edi
|
||||
; CHECK-NEXT: andl $7, %edi
|
||||
; CHECK-NEXT: movl %edi, %esi
|
||||
; CHECK-NEXT: shll $23, %esi
|
||||
; CHECK-NEXT: orl %eax, %esi
|
||||
; CHECK-NEXT: leal 1040187392(%rcx,%rsi), %esi
|
||||
; CHECK-NEXT: bsrl %edx, %r8d
|
||||
; CHECK-NEXT: movl %edx, %r9d
|
||||
; CHECK-NEXT: btcl %r8d, %r9d
|
||||
; CHECK-NEXT: xorl $31, %r8d
|
||||
; CHECK-NEXT: leal -8(%r8), %ecx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; CHECK-NEXT: shll %cl, %r9d
|
||||
; CHECK-NEXT: movl $154, %ecx
|
||||
; CHECK-NEXT: subl %r8d, %ecx
|
||||
; CHECK-NEXT: shll $23, %ecx
|
||||
; CHECK-NEXT: orl %eax, %ecx
|
||||
; CHECK-NEXT: orl %r9d, %ecx
|
||||
; CHECK-NEXT: testl %edx, %edx
|
||||
; CHECK-NEXT: sete %dl
|
||||
; CHECK-NEXT: setne %r8b
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: sete %dil
|
||||
; CHECK-NEXT: testb %r8b, %dil
|
||||
; CHECK-NEXT: cmovel %esi, %ecx
|
||||
; CHECK-NEXT: testb %dl, %dil
|
||||
; CHECK-NEXT: cmovnel %eax, %ecx
|
||||
; CHECK-NEXT: movd %ecx, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN (FiniteOnly)
|
||||
; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2
|
||||
; No Inf, no NaN. All bit patterns are finite.
|
||||
|
||||
; Float6E2M3FN normal: 0_01_000 = 1.0
|
||||
define float @from_f6e2m3fn_normal() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_normal:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN max: 0_11_111 = 7.5
|
||||
define float @from_f6e2m3fn_max() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_max:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [7.5E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN denorm: 0_00_001 = 0.125
|
||||
define float @from_f6e2m3fn_denorm() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_denorm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.25E-1,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN zero: 0_00_000 = +0.0
|
||||
define float @from_f6e2m3fn_zero() {
|
||||
; CHECK-LABEL: from_f6e2m3fn_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float6E2M3FN runtime arg test
|
||||
define float @from_f6e2m3fn_dynamic(i6 %x) {
|
||||
; CHECK-LABEL: from_f6e2m3fn_dynamic:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %edx
|
||||
; CHECK-NEXT: andl $7, %edx
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shll $20, %ecx
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: andl $-32, %eax
|
||||
; CHECK-NEXT: shll $26, %eax
|
||||
; CHECK-NEXT: shrl $3, %edi
|
||||
; CHECK-NEXT: andl $3, %edi
|
||||
; CHECK-NEXT: movl %edi, %esi
|
||||
; CHECK-NEXT: shll $23, %esi
|
||||
; CHECK-NEXT: orl %eax, %esi
|
||||
; CHECK-NEXT: leal 1056964608(%rcx,%rsi), %esi
|
||||
; CHECK-NEXT: bsrl %edx, %r8d
|
||||
; CHECK-NEXT: movl %edx, %r9d
|
||||
; CHECK-NEXT: btcl %r8d, %r9d
|
||||
; CHECK-NEXT: xorl $31, %r8d
|
||||
; CHECK-NEXT: leal -8(%r8), %ecx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; CHECK-NEXT: shll %cl, %r9d
|
||||
; CHECK-NEXT: movl $155, %ecx
|
||||
; CHECK-NEXT: subl %r8d, %ecx
|
||||
; CHECK-NEXT: shll $23, %ecx
|
||||
; CHECK-NEXT: orl %eax, %ecx
|
||||
; CHECK-NEXT: orl %r9d, %ecx
|
||||
; CHECK-NEXT: testl %edx, %edx
|
||||
; CHECK-NEXT: sete %dl
|
||||
; CHECK-NEXT: setne %r8b
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: sete %dil
|
||||
; CHECK-NEXT: testb %r8b, %dil
|
||||
; CHECK-NEXT: cmovel %esi, %ecx
|
||||
; CHECK-NEXT: testb %dl, %dil
|
||||
; CHECK-NEXT: cmovnel %eax, %ecx
|
||||
; CHECK-NEXT: movd %ecx, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN (FiniteOnly)
|
||||
; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2
|
||||
; No Inf, no NaN.
|
||||
|
||||
; Float4E2M1FN normal: 0_01_0 = 1.0
|
||||
define float @from_f4e2m1fn_normal() {
|
||||
; CHECK-LABEL: from_f4e2m1fn_normal:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN denorm: 0_00_1 = 0.5
|
||||
define float @from_f4e2m1fn_denorm() {
|
||||
; CHECK-LABEL: from_f4e2m1fn_denorm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [5.0E-1,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN max: 0_11_1 = 6.0
|
||||
define float @from_f4e2m1fn_max() {
|
||||
; CHECK-LABEL: from_f4e2m1fn_max:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float4E2M1FN runtime arg test
|
||||
define float @from_f4e2m1fn_dynamic(i4 %x) {
|
||||
; CHECK-LABEL: from_f4e2m1fn_dynamic:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %edx
|
||||
; CHECK-NEXT: andl $1, %edx
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shll $22, %ecx
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: andl $-8, %eax
|
||||
; CHECK-NEXT: shll $28, %eax
|
||||
; CHECK-NEXT: shrl %edi
|
||||
; CHECK-NEXT: andl $3, %edi
|
||||
; CHECK-NEXT: movl %edi, %esi
|
||||
; CHECK-NEXT: shll $23, %esi
|
||||
; CHECK-NEXT: orl %eax, %esi
|
||||
; CHECK-NEXT: leal 1056964608(%rcx,%rsi), %esi
|
||||
; CHECK-NEXT: bsrl %edx, %r8d
|
||||
; CHECK-NEXT: movl %edx, %r9d
|
||||
; CHECK-NEXT: btcl %r8d, %r9d
|
||||
; CHECK-NEXT: xorl $31, %r8d
|
||||
; CHECK-NEXT: leal -8(%r8), %ecx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; CHECK-NEXT: shll %cl, %r9d
|
||||
; CHECK-NEXT: movl $157, %ecx
|
||||
; CHECK-NEXT: subl %r8d, %ecx
|
||||
; CHECK-NEXT: shll $23, %ecx
|
||||
; CHECK-NEXT: orl %eax, %ecx
|
||||
; CHECK-NEXT: orl %r9d, %ecx
|
||||
; CHECK-NEXT: testl %edx, %edx
|
||||
; CHECK-NEXT: sete %dl
|
||||
; CHECK-NEXT: setne %r8b
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: sete %dil
|
||||
; CHECK-NEXT: testb %r8b, %dil
|
||||
; CHECK-NEXT: cmovel %esi, %ecx
|
||||
; CHECK-NEXT: testb %dl, %dil
|
||||
; CHECK-NEXT: cmovnel %eax, %ecx
|
||||
; CHECK-NEXT: movd %ecx, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN")
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Float8E5M2 to f16: 1.0
|
||||
define half @from_f8e5m2_to_f16() {
|
||||
; CHECK-LABEL: from_f8e5m2_to_f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret half %r
|
||||
}
|
||||
|
||||
; Float8E5M2 to f64: 1.0
|
||||
define double @from_f8e5m2_to_f64() {
|
||||
; CHECK-LABEL: from_f8e5m2_to_f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: retq
|
||||
%r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret double %r
|
||||
}
|
||||
|
||||
declare bfloat @llvm.convert.from.arbitrary.fp.bf16.i8(i8, metadata)
|
||||
|
||||
; Float8E5M2 to bf16: 1.0
|
||||
; bf16 has: sign(1) exp(8) mant(7), bias=127
|
||||
define bfloat @from_f8e5m2_to_bf16() {
|
||||
; CHECK-LABEL: from_f8e5m2_to_bf16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: callq __truncsfbf2@PLT
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%r = call bfloat @llvm.convert.from.arbitrary.fp.bf16.i8(i8 60, metadata !"Float8E5M2")
|
||||
ret bfloat %r
|
||||
}
|
||||
|
||||
; Vector test: Float4E2M1FN <4 x i4> -> <4 x float>
|
||||
define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) {
|
||||
; CHECK-LABEL: fp4_to_f32_vec:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
|
||||
; CHECK-NEXT: movd %xmm1, %esi
|
||||
; CHECK-NEXT: movl %esi, %edi
|
||||
; CHECK-NEXT: andl $1, %edi
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: shll $22, %eax
|
||||
; CHECK-NEXT: movl %esi, %edx
|
||||
; CHECK-NEXT: andl $-8, %edx
|
||||
; CHECK-NEXT: shll $28, %edx
|
||||
; CHECK-NEXT: shrl %esi
|
||||
; CHECK-NEXT: andl $3, %esi
|
||||
; CHECK-NEXT: movl %esi, %ecx
|
||||
; CHECK-NEXT: shll $23, %ecx
|
||||
; CHECK-NEXT: orl %edx, %ecx
|
||||
; CHECK-NEXT: leal 1056964608(%rax,%rcx), %r8d
|
||||
; CHECK-NEXT: bsrl %edi, %r9d
|
||||
; CHECK-NEXT: movl %edi, %r10d
|
||||
; CHECK-NEXT: btcl %r9d, %r10d
|
||||
; CHECK-NEXT: xorl $31, %r9d
|
||||
; CHECK-NEXT: leal -8(%r9), %ecx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; CHECK-NEXT: shll %cl, %r10d
|
||||
; CHECK-NEXT: movl $157, %eax
|
||||
; CHECK-NEXT: movl $157, %ecx
|
||||
; CHECK-NEXT: subl %r9d, %ecx
|
||||
; CHECK-NEXT: shll $23, %ecx
|
||||
; CHECK-NEXT: orl %edx, %ecx
|
||||
; CHECK-NEXT: orl %r10d, %ecx
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: sete %dil
|
||||
; CHECK-NEXT: setne %r9b
|
||||
; CHECK-NEXT: testl %esi, %esi
|
||||
; CHECK-NEXT: sete %sil
|
||||
; CHECK-NEXT: testb %r9b, %sil
|
||||
; CHECK-NEXT: cmovel %r8d, %ecx
|
||||
; CHECK-NEXT: testb %dil, %sil
|
||||
; CHECK-NEXT: cmovnel %edx, %ecx
|
||||
; CHECK-NEXT: movd %ecx, %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm2, %esi
|
||||
; CHECK-NEXT: movl %esi, %edi
|
||||
; CHECK-NEXT: andl $1, %edi
|
||||
; CHECK-NEXT: movl %edi, %ecx
|
||||
; CHECK-NEXT: shll $22, %ecx
|
||||
; CHECK-NEXT: movl %esi, %edx
|
||||
; CHECK-NEXT: andl $-8, %edx
|
||||
; CHECK-NEXT: shll $28, %edx
|
||||
; CHECK-NEXT: shrl %esi
|
||||
; CHECK-NEXT: andl $3, %esi
|
||||
; CHECK-NEXT: movl %esi, %r8d
|
||||
; CHECK-NEXT: shll $23, %r8d
|
||||
; CHECK-NEXT: orl %edx, %r8d
|
||||
; CHECK-NEXT: leal 1056964608(%rcx,%r8), %r8d
|
||||
; CHECK-NEXT: bsrl %edi, %r9d
|
||||
; CHECK-NEXT: movl %edi, %r10d
|
||||
; CHECK-NEXT: btcl %r9d, %r10d
|
||||
; CHECK-NEXT: xorl $31, %r9d
|
||||
; CHECK-NEXT: leal -8(%r9), %ecx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; CHECK-NEXT: shll %cl, %r10d
|
||||
; CHECK-NEXT: movl $157, %ecx
|
||||
; CHECK-NEXT: subl %r9d, %ecx
|
||||
; CHECK-NEXT: shll $23, %ecx
|
||||
; CHECK-NEXT: orl %edx, %ecx
|
||||
; CHECK-NEXT: orl %r10d, %ecx
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: sete %dil
|
||||
; CHECK-NEXT: setne %r9b
|
||||
; CHECK-NEXT: testl %esi, %esi
|
||||
; CHECK-NEXT: sete %sil
|
||||
; CHECK-NEXT: testb %r9b, %sil
|
||||
; CHECK-NEXT: cmovel %r8d, %ecx
|
||||
; CHECK-NEXT: testb %dil, %sil
|
||||
; CHECK-NEXT: cmovnel %edx, %ecx
|
||||
; CHECK-NEXT: movd %ecx, %xmm2
|
||||
; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; CHECK-NEXT: movd %xmm0, %esi
|
||||
; CHECK-NEXT: movl %esi, %edi
|
||||
; CHECK-NEXT: andl $1, %edi
|
||||
; CHECK-NEXT: movl %edi, %ecx
|
||||
; CHECK-NEXT: shll $22, %ecx
|
||||
; CHECK-NEXT: movl %esi, %edx
|
||||
; CHECK-NEXT: andl $-8, %edx
|
||||
; CHECK-NEXT: shll $28, %edx
|
||||
; CHECK-NEXT: shrl %esi
|
||||
; CHECK-NEXT: andl $3, %esi
|
||||
; CHECK-NEXT: movl %esi, %r8d
|
||||
; CHECK-NEXT: shll $23, %r8d
|
||||
; CHECK-NEXT: orl %edx, %r8d
|
||||
; CHECK-NEXT: leal 1056964608(%rcx,%r8), %r8d
|
||||
; CHECK-NEXT: bsrl %edi, %r9d
|
||||
; CHECK-NEXT: movl %edi, %r10d
|
||||
; CHECK-NEXT: btcl %r9d, %r10d
|
||||
; CHECK-NEXT: xorl $31, %r9d
|
||||
; CHECK-NEXT: leal -8(%r9), %ecx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; CHECK-NEXT: shll %cl, %r10d
|
||||
; CHECK-NEXT: movl $157, %ecx
|
||||
; CHECK-NEXT: subl %r9d, %ecx
|
||||
; CHECK-NEXT: shll $23, %ecx
|
||||
; CHECK-NEXT: orl %edx, %ecx
|
||||
; CHECK-NEXT: orl %r10d, %ecx
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: sete %dil
|
||||
; CHECK-NEXT: setne %r9b
|
||||
; CHECK-NEXT: testl %esi, %esi
|
||||
; CHECK-NEXT: sete %sil
|
||||
; CHECK-NEXT: testb %r9b, %sil
|
||||
; CHECK-NEXT: cmovel %r8d, %ecx
|
||||
; CHECK-NEXT: testb %dil, %sil
|
||||
; CHECK-NEXT: cmovnel %edx, %ecx
|
||||
; CHECK-NEXT: movd %ecx, %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
|
||||
; CHECK-NEXT: movd %xmm0, %esi
|
||||
; CHECK-NEXT: movl %esi, %edi
|
||||
; CHECK-NEXT: andl $1, %edi
|
||||
; CHECK-NEXT: movl %edi, %ecx
|
||||
; CHECK-NEXT: shll $22, %ecx
|
||||
; CHECK-NEXT: movl %esi, %edx
|
||||
; CHECK-NEXT: andl $-8, %edx
|
||||
; CHECK-NEXT: shll $28, %edx
|
||||
; CHECK-NEXT: shrl %esi
|
||||
; CHECK-NEXT: andl $3, %esi
|
||||
; CHECK-NEXT: movl %esi, %r8d
|
||||
; CHECK-NEXT: shll $23, %r8d
|
||||
; CHECK-NEXT: orl %edx, %r8d
|
||||
; CHECK-NEXT: leal 1056964608(%rcx,%r8), %r8d
|
||||
; CHECK-NEXT: bsrl %edi, %r9d
|
||||
; CHECK-NEXT: movl %edi, %r10d
|
||||
; CHECK-NEXT: btcl %r9d, %r10d
|
||||
; CHECK-NEXT: xorl $31, %r9d
|
||||
; CHECK-NEXT: leal -8(%r9), %ecx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; CHECK-NEXT: shll %cl, %r10d
|
||||
; CHECK-NEXT: subl %r9d, %eax
|
||||
; CHECK-NEXT: shll $23, %eax
|
||||
; CHECK-NEXT: orl %edx, %eax
|
||||
; CHECK-NEXT: orl %r10d, %eax
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: sete %cl
|
||||
; CHECK-NEXT: setne %dil
|
||||
; CHECK-NEXT: testl %esi, %esi
|
||||
; CHECK-NEXT: sete %sil
|
||||
; CHECK-NEXT: testb %dil, %sil
|
||||
; CHECK-NEXT: cmovel %r8d, %eax
|
||||
; CHECK-NEXT: testb %cl, %sil
|
||||
; CHECK-NEXT: cmovnel %edx, %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN")
|
||||
ret <4 x float> %r
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user