llvm-project/clang/utils/TableGen/NeonEmitter.cpp
Jon Roelofs a0fcb50bf9
[ARM] Improve arm_neon.h header diagnostic when included on unsupported targets (#147817)
The footgun here was that the preprocessor diagnostic that looks for
__ARM_FP would fire when included on targets like x86_64, but the
suggestion it gives in that case is totally bogus. Avoid giving bad
advice, by first checking whether we're being built for an appropriate
target, and only then do the soft-fp check.

rdar://155449666
2025-07-11 10:21:13 -07:00

2824 lines
88 KiB
C++

//===-- NeonEmitter.cpp - Generate arm_neon.h for use with clang ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This tablegen backend is responsible for emitting arm_neon.h, which includes
// a declaration and definition of each function specified by the ARM NEON
// compiler interface. See ARM document DUI0348B.
//
// Each NEON instruction is implemented in terms of 1 or more functions which
// are suffixed with the element type of the input vectors. Functions may be
// implemented in terms of generic vector operations such as +, *, -, etc. or
// by calling a __builtin_-prefixed function which will be handled by clang's
// CodeGen library.
//
// Additional validation code can be generated by this file when runHeader() is
// called, rather than the normal run() entry point.
//
// See also the documentation in include/clang/Basic/arm_neon.td.
//
//===----------------------------------------------------------------------===//
#include "TableGenBackends.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/AArch64ImmCheck.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
#include "llvm/TableGen/StringToOffsetTable.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstddef>
#include <cstdint>
#include <deque>
#include <map>
#include <optional>
#include <set>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
using namespace llvm;
namespace {
// While globals are generally bad, this one allows us to perform assertions
// liberally and somehow still trace them back to the def they indirectly
// came from.
static const Record *CurrentRecord = nullptr;
static void assert_with_loc(bool Assertion, const std::string &Str) {
if (!Assertion) {
if (CurrentRecord)
PrintFatalError(CurrentRecord->getLoc(), Str);
else
PrintFatalError(Str);
}
}
enum ClassKind {
ClassNone,
ClassI, // generic integer instruction, e.g., "i8" suffix
ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
ClassW, // width-specific instruction, e.g., "8" suffix
ClassV, // void-suffix instruction, no suffix
ClassB, // bitcast arguments with enum argument to specify type
ClassL, // Logical instructions which are op instructions
// but we need to not emit any suffix for in our
// tests.
ClassNoTest // Instructions which we do not test since they are
// not TRUE instructions.
};
/// NeonTypeFlags - Flags to identify the types for overloaded Neon
/// builtins. These must be kept in sync with the flags in
/// include/clang/Basic/TargetBuiltins.h.
namespace NeonTypeFlags {
enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
enum EltType {
Int8,
Int16,
Int32,
Int64,
Poly8,
Poly16,
Poly64,
Poly128,
Float16,
Float32,
Float64,
BFloat16,
MFloat8
};
} // end namespace NeonTypeFlags
class NeonEmitter;
//===----------------------------------------------------------------------===//
// TypeSpec
//===----------------------------------------------------------------------===//
/// A TypeSpec is just a simple wrapper around a string, but gets its own type
/// for strong typing purposes.
///
/// A TypeSpec can be used to create a type.
class TypeSpec : public std::string {
public:
static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
std::vector<TypeSpec> Ret;
TypeSpec Acc;
for (char I : Str.str()) {
if (islower(I)) {
Acc.push_back(I);
Ret.push_back(TypeSpec(Acc));
Acc.clear();
} else {
Acc.push_back(I);
}
}
return Ret;
}
};
//===----------------------------------------------------------------------===//
// Type
//===----------------------------------------------------------------------===//
/// A Type. Not much more to say here.
class Type {
private:
TypeSpec TS;
enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8, FPM };
TypeKind Kind;
bool Immediate, Constant, Pointer;
// ScalarForMangling and NoManglingQ are really not suited to live here as
// they are not related to the type. But they live in the TypeSpec (not the
// prototype), so this is really the only place to store them.
bool ScalarForMangling, NoManglingQ;
unsigned Bitwidth, ElementBitwidth, NumVectors;
public:
Type()
: Kind(Void), Immediate(false), Constant(false),
Pointer(false), ScalarForMangling(false), NoManglingQ(false),
Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
Type(TypeSpec TS, StringRef CharMods)
: TS(std::move(TS)), Kind(Void), Immediate(false),
Constant(false), Pointer(false), ScalarForMangling(false),
NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
applyModifiers(CharMods);
}
/// Returns a type representing "void".
static Type getVoid() { return Type(); }
bool operator==(const Type &Other) const { return str() == Other.str(); }
bool operator!=(const Type &Other) const { return !operator==(Other); }
//
// Query functions
//
bool isScalarForMangling() const { return ScalarForMangling; }
bool noManglingQ() const { return NoManglingQ; }
bool isPointer() const { return Pointer; }
bool isValue() const { return !isVoid() && !isPointer(); }
bool isScalar() const { return isValue() && NumVectors == 0; }
bool isVector() const { return isValue() && NumVectors > 0; }
bool isConstPointer() const { return Constant; }
bool isFloating() const { return Kind == Float; }
bool isInteger() const { return Kind == SInt || Kind == UInt; }
bool isPoly() const { return Kind == Poly; }
bool isSigned() const { return Kind == SInt; }
bool isImmediate() const { return Immediate; }
bool isFloat() const { return isFloating() && ElementBitwidth == 32; }
bool isDouble() const { return isFloating() && ElementBitwidth == 64; }
bool isHalf() const { return isFloating() && ElementBitwidth == 16; }
bool isChar() const { return ElementBitwidth == 8; }
bool isShort() const { return isInteger() && ElementBitwidth == 16; }
bool isInt() const { return isInteger() && ElementBitwidth == 32; }
bool isLong() const { return isInteger() && ElementBitwidth == 64; }
bool isVoid() const { return Kind == Void; }
bool isBFloat16() const { return Kind == BFloat16; }
bool isMFloat8() const { return Kind == MFloat8; }
bool isFPM() const { return Kind == FPM; }
unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
unsigned getSizeInBits() const { return Bitwidth; }
unsigned getElementSizeInBits() const { return ElementBitwidth; }
unsigned getNumVectors() const { return NumVectors; }
//
// Mutator functions
//
void makeUnsigned() {
assert(!isVoid() && "not a potentially signed type");
Kind = UInt;
}
void makeSigned() {
assert(!isVoid() && "not a potentially signed type");
Kind = SInt;
}
void makeInteger(unsigned ElemWidth, bool Sign) {
assert(!isVoid() && "converting void to int probably not useful");
Kind = Sign ? SInt : UInt;
Immediate = false;
ElementBitwidth = ElemWidth;
}
void makeImmediate(unsigned ElemWidth) {
Kind = SInt;
Immediate = true;
ElementBitwidth = ElemWidth;
}
void makeScalar() {
Bitwidth = ElementBitwidth;
NumVectors = 0;
}
void makeOneVector() {
assert(isVector());
NumVectors = 1;
}
void make32BitElement() {
assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
ElementBitwidth = 32;
}
void doubleLanes() {
assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
Bitwidth = 128;
}
void halveLanes() {
assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
Bitwidth = 64;
}
/// Return the C string representation of a type, which is the typename
/// defined in stdint.h or arm_neon.h.
std::string str() const;
/// Return the string representation of a type, which is an encoded
/// string for passing to the BUILTIN() macro in Builtins.def.
std::string builtin_str() const;
/// Return the value in NeonTypeFlags for this type.
unsigned getNeonEnum() const;
/// Parse a type from a stdint.h or arm_neon.h typedef name,
/// for example uint32x2_t or int64_t.
static Type fromTypedefName(StringRef Name);
private:
/// Creates the type based on the typespec string in TS.
/// Sets "Quad" to true if the "Q" or "H" modifiers were
/// seen. This is needed by applyModifier as some modifiers
/// only take effect if the type size was changed by "Q" or "H".
void applyTypespec(bool &Quad);
/// Applies prototype modifiers to the type.
void applyModifiers(StringRef Mods);
};
//===----------------------------------------------------------------------===//
// Variable
//===----------------------------------------------------------------------===//
/// A variable is a simple class that just has a type and a name.
class Variable {
Type T;
std::string N;
public:
Variable() : T(Type::getVoid()) {}
Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
Type getType() const { return T; }
std::string getName() const { return "__" + N; }
};
//===----------------------------------------------------------------------===//
// Intrinsic
//===----------------------------------------------------------------------===//
/// The main grunt class. This represents an instantiation of an intrinsic with
/// a particular typespec and prototype.
class Intrinsic {
/// The Record this intrinsic was created from.
const Record *R;
/// The unmangled name.
std::string Name;
/// The input and output typespecs. InTS == OutTS except when
/// CartesianProductWith is non-empty - this is the case for vreinterpret.
TypeSpec OutTS, InTS;
/// The base class kind. Most intrinsics use ClassS, which has full type
/// info for integers (s32/u32). Some use ClassI, which doesn't care about
/// signedness (i32), while some (ClassB) have no type at all, only a width
/// (32).
ClassKind CK;
/// The list of DAGs for the body. May be empty, in which case we should
/// emit a builtin call.
const ListInit *Body;
/// The architectural ifdef guard.
std::string ArchGuard;
/// The architectural target() guard.
std::string TargetGuard;
/// Set if the Unavailable bit is 1. This means we don't generate a body,
/// just an "unavailable" attribute on a declaration.
bool IsUnavailable;
/// Is this intrinsic safe for big-endian? or does it need its arguments
/// reversing?
bool BigEndianSafe;
/// The types of return value [0] and parameters [1..].
std::vector<Type> Types;
SmallVector<ImmCheck, 2> ImmChecks;
/// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
int PolymorphicKeyType;
/// The local variables defined.
std::map<std::string, Variable, std::less<>> Variables;
/// NeededEarly - set if any other intrinsic depends on this intrinsic.
bool NeededEarly;
/// UseMacro - set if we should implement using a macro or unset for a
/// function.
bool UseMacro;
/// The set of intrinsics that this intrinsic uses/requires.
std::set<Intrinsic *> Dependencies;
/// The "base type", which is Type('d', OutTS). InBaseType is only
/// different if CartesianProductWith is non-empty (for vreinterpret).
Type BaseType, InBaseType;
/// The return variable.
Variable RetVar;
/// A postfix to apply to every variable. Defaults to "".
std::string VariablePostfix;
NeonEmitter &Emitter;
std::stringstream OS;
bool isBigEndianSafe() const {
if (BigEndianSafe)
return true;
for (const auto &T : Types){
if (T.isVector() && T.getNumElements() > 1)
return false;
}
return true;
}
public:
Intrinsic(const Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
TypeSpec InTS, ClassKind CK, const ListInit *Body,
NeonEmitter &Emitter, StringRef ArchGuard, StringRef TargetGuard,
bool IsUnavailable, bool BigEndianSafe)
: R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()),
IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe),
PolymorphicKeyType(0), NeededEarly(false), UseMacro(false),
BaseType(OutTS, "."), InBaseType(InTS, "."), Emitter(Emitter) {
// Modify the TypeSpec per-argument to get a concrete Type, and create
// known variables for each.
// Types[0] is the return value.
unsigned Pos = 0;
Types.emplace_back(OutTS, getNextModifiers(Proto, Pos));
StringRef Mods = getNextModifiers(Proto, Pos);
while (!Mods.empty()) {
Types.emplace_back(InTS, Mods);
if (Mods.contains('!'))
PolymorphicKeyType = Types.size() - 1;
Mods = getNextModifiers(Proto, Pos);
}
for (const auto &Type : Types) {
// If this builtin takes an immediate argument, we need to #define it rather
// than use a standard declaration, so that SemaChecking can range check
// the immediate passed by the user.
// Pointer arguments need to use macros to avoid hiding aligned attributes
// from the pointer type.
// It is not permitted to pass or return an __fp16 by value, so intrinsics
// taking a scalar float16_t must be implemented as macros.
if (Type.isImmediate() || Type.isPointer() ||
(Type.isScalar() && Type.isHalf()))
UseMacro = true;
}
int ArgIdx, Kind, TypeArgIdx;
for (const Record *I : R->getValueAsListOfDefs("ImmChecks")) {
unsigned EltSizeInBits = 0, VecSizeInBits = 0;
ArgIdx = I->getValueAsInt("ImmArgIdx");
TypeArgIdx = I->getValueAsInt("TypeContextArgIdx");
Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
assert((ArgIdx >= 0 && Kind >= 0) &&
"ImmArgIdx and Kind must be nonnegative");
if (TypeArgIdx >= 0) {
Type ContextType = getParamType(TypeArgIdx);
// Element size cannot be set for intrinscs that map to polymorphic
// builtins.
if (CK != ClassB)
EltSizeInBits = ContextType.getElementSizeInBits();
VecSizeInBits = ContextType.getSizeInBits();
}
ImmChecks.emplace_back(ArgIdx, Kind, EltSizeInBits, VecSizeInBits);
}
sort(ImmChecks.begin(), ImmChecks.end(),
[](const ImmCheck &a, const ImmCheck &b) {
return a.getImmArgIdx() < b.getImmArgIdx();
}); // Sort for comparison with other intrinsics which map to the
// same builtin
}
/// Get the Record that this intrinsic is based off.
const Record *getRecord() const { return R; }
/// Get the set of Intrinsics that this intrinsic calls.
/// this is the set of immediate dependencies, NOT the
/// transitive closure.
const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
/// Get the architectural guard string (#ifdef).
std::string getArchGuard() const { return ArchGuard; }
std::string getTargetGuard() const { return TargetGuard; }
ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; }
/// Get the non-mangled name.
std::string getName() const { return Name; }
/// Return true if the intrinsic takes an immediate operand.
bool hasImmediate() const {
return any_of(Types, [](const Type &T) { return T.isImmediate(); });
}
// Return if the supplied argument is an immediate
bool isArgImmediate(unsigned idx) const {
return Types[idx + 1].isImmediate();
}
unsigned getNumParams() const { return Types.size() - 1; }
Type getReturnType() const { return Types[0]; }
Type getParamType(unsigned I) const { return Types[I + 1]; }
Type getBaseType() const { return BaseType; }
Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; }
/// Return true if the prototype has a scalar argument.
bool protoHasScalar() const;
/// Return the index that parameter PIndex will sit at
/// in a generated function call. This is often just PIndex,
/// but may not be as things such as multiple-vector operands
/// and sret parameters need to be taken into account.
unsigned getGeneratedParamIdx(unsigned PIndex) {
unsigned Idx = 0;
if (getReturnType().getNumVectors() > 1)
// Multiple vectors are passed as sret.
++Idx;
for (unsigned I = 0; I < PIndex; ++I)
Idx += std::max(1U, getParamType(I).getNumVectors());
return Idx;
}
bool hasBody() const { return Body && !Body->empty(); }
void setNeededEarly() { NeededEarly = true; }
bool operator<(const Intrinsic &Other) const {
// Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name)
return std::tie(ArchGuard, TargetGuard, Name) <
std::tie(Other.ArchGuard, Other.TargetGuard, Other.Name);
}
ClassKind getClassKind(bool UseClassBIfScalar = false) {
if (UseClassBIfScalar && !protoHasScalar())
return ClassB;
return CK;
}
/// Return the name, mangled with type information.
/// If ForceClassS is true, use ClassS (u32/s32) instead
/// of the intrinsic's own type class.
std::string getMangledName(bool ForceClassS = false) const;
/// Return the type code for a builtin function call.
std::string getInstTypeCode(Type T, ClassKind CK) const;
/// Return the type string for a BUILTIN() macro in Builtins.def.
std::string getBuiltinTypeStr();
/// Generate the intrinsic, returning code.
std::string generate();
/// Perform type checking and populate the dependency graph, but
/// don't generate code yet.
void indexBody();
private:
StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
std::string mangleName(std::string Name, ClassKind CK) const;
void initVariables();
std::string replaceParamsIn(std::string S);
void emitBodyAsBuiltinCall();
void generateImpl(bool ReverseArguments,
StringRef NamePrefix, StringRef CallPrefix);
void emitReturn();
void emitBody(StringRef CallPrefix);
void emitShadowedArgs();
void emitArgumentReversal();
void emitReturnVarDecl();
void emitReturnReversal();
void emitReverseVariable(Variable &Dest, Variable &Src);
void emitNewLine();
void emitClosingBrace();
void emitOpeningBrace();
void emitPrototype(StringRef NamePrefix);
class DagEmitter {
Intrinsic &Intr;
StringRef CallPrefix;
public:
DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
Intr(Intr), CallPrefix(CallPrefix) {
}
std::pair<Type, std::string> emitDagArg(const Init *Arg,
std::string ArgName);
std::pair<Type, std::string> emitDagSaveTemp(const DagInit *DI);
std::pair<Type, std::string> emitDagSplat(const DagInit *DI);
std::pair<Type, std::string> emitDagDup(const DagInit *DI);
std::pair<Type, std::string> emitDagDupTyped(const DagInit *DI);
std::pair<Type, std::string> emitDagShuffle(const DagInit *DI);
std::pair<Type, std::string> emitDagCast(const DagInit *DI, bool IsBitCast);
std::pair<Type, std::string> emitDagCall(const DagInit *DI,
bool MatchMangledName);
std::pair<Type, std::string> emitDagNameReplace(const DagInit *DI);
std::pair<Type, std::string> emitDagLiteral(const DagInit *DI);
std::pair<Type, std::string> emitDagOp(const DagInit *DI);
std::pair<Type, std::string> emitDag(const DagInit *DI);
};
};
//===----------------------------------------------------------------------===//
// NeonEmitter
//===----------------------------------------------------------------------===//
class NeonEmitter {
const RecordKeeper &Records;
DenseMap<const Record *, ClassKind> ClassMap;
std::map<std::string, std::deque<Intrinsic>, std::less<>> IntrinsicMap;
unsigned UniqueNumber;
void createIntrinsic(const Record *R, SmallVectorImpl<Intrinsic *> &Out);
void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
void genStreamingSVECompatibleList(raw_ostream &OS,
SmallVectorImpl<Intrinsic *> &Defs);
void genOverloadTypeCheckCode(raw_ostream &OS,
SmallVectorImpl<Intrinsic *> &Defs);
bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
const ArrayRef<ImmCheck> ChecksB);
void genIntrinsicRangeCheckCode(raw_ostream &OS,
SmallVectorImpl<Intrinsic *> &Defs);
public:
/// Called by Intrinsic - this attempts to get an intrinsic that takes
/// the given types as arguments.
Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
std::optional<std::string> MangledName);
/// Called by Intrinsic - returns a globally-unique number.
unsigned getUniqueNumber() { return UniqueNumber++; }
NeonEmitter(const RecordKeeper &R) : Records(R), UniqueNumber(0) {
const Record *SI = R.getClass("SInst");
const Record *II = R.getClass("IInst");
const Record *WI = R.getClass("WInst");
const Record *VI = R.getClass("VInst");
const Record *SOpI = R.getClass("SOpInst");
const Record *IOpI = R.getClass("IOpInst");
const Record *WOpI = R.getClass("WOpInst");
const Record *LOpI = R.getClass("LOpInst");
const Record *NoTestOpI = R.getClass("NoTestOpInst");
ClassMap[SI] = ClassS;
ClassMap[II] = ClassI;
ClassMap[WI] = ClassW;
ClassMap[VI] = ClassV;
ClassMap[SOpI] = ClassS;
ClassMap[IOpI] = ClassI;
ClassMap[WOpI] = ClassW;
ClassMap[LOpI] = ClassL;
ClassMap[NoTestOpI] = ClassNoTest;
}
// Emit arm_neon.h.inc
void run(raw_ostream &o);
// Emit arm_fp16.h.inc
void runFP16(raw_ostream &o);
// Emit arm_bf16.h.inc
void runBF16(raw_ostream &o);
void runVectorTypes(raw_ostream &o);
// Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
// arm_bf16.h
void runHeader(raw_ostream &o);
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// Type implementation
//===----------------------------------------------------------------------===//
std::string Type::str() const {
if (isVoid())
return "void";
if (isFPM())
return "fpm_t";
std::string S;
if (isInteger() && !isSigned())
S += "u";
if (isPoly())
S += "poly";
else if (isFloating())
S += "float";
else if (isBFloat16())
S += "bfloat";
else if (isMFloat8())
S += "mfloat";
else
S += "int";
S += utostr(ElementBitwidth);
if (isVector())
S += "x" + utostr(getNumElements());
if (NumVectors > 1)
S += "x" + utostr(NumVectors);
S += "_t";
if (Constant)
S += " const";
if (Pointer)
S += " *";
return S;
}
std::string Type::builtin_str() const {
std::string S;
if (isVoid())
return "v";
if (isPointer()) {
// All pointers are void pointers.
S = "v";
if (isConstPointer())
S += "C";
S += "*";
return S;
} else if (isInteger())
switch (ElementBitwidth) {
case 8: S += "c"; break;
case 16: S += "s"; break;
case 32: S += "i"; break;
case 64: S += "Wi"; break;
case 128: S += "LLLi"; break;
default: llvm_unreachable("Unhandled case!");
}
else if (isBFloat16()) {
assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");
S += "y";
} else if (isMFloat8()) {
assert(ElementBitwidth == 8 && "MFloat8 can only be 8 bits");
S += "m";
} else if (isFPM()) {
S += "UWi";
} else
switch (ElementBitwidth) {
case 16: S += "h"; break;
case 32: S += "f"; break;
case 64: S += "d"; break;
default: llvm_unreachable("Unhandled case!");
}
// FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
if (isChar() && !isPointer() && isSigned())
// Make chars explicitly signed.
S = "S" + S;
else if (isInteger() && !isSigned())
S = "U" + S;
// Constant indices are "int", but have the "constant expression" modifier.
if (isImmediate()) {
assert(isInteger() && isSigned());
S = "I" + S;
}
if (isScalar())
return S;
std::string Ret;
for (unsigned I = 0; I < NumVectors; ++I)
Ret += "V" + utostr(getNumElements()) + S;
return Ret;
}
unsigned Type::getNeonEnum() const {
unsigned Addend;
switch (ElementBitwidth) {
case 8: Addend = 0; break;
case 16: Addend = 1; break;
case 32: Addend = 2; break;
case 64: Addend = 3; break;
case 128: Addend = 4; break;
default: llvm_unreachable("Unhandled element bitwidth!");
}
unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
if (isPoly()) {
// Adjustment needed because Poly32 doesn't exist.
if (Addend >= 2)
--Addend;
Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
}
if (isFloating()) {
assert(Addend != 0 && "Float8 doesn't exist!");
Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
}
if (isBFloat16()) {
assert(Addend == 1 && "BFloat16 is only 16 bit");
Base = (unsigned)NeonTypeFlags::BFloat16;
}
if (isMFloat8()) {
Base = (unsigned)NeonTypeFlags::MFloat8;
}
if (Bitwidth == 128)
Base |= (unsigned)NeonTypeFlags::QuadFlag;
if (isInteger() && !isSigned())
Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
return Base;
}
Type Type::fromTypedefName(StringRef Name) {
Type T;
T.Kind = SInt;
if (Name.consume_front("u"))
T.Kind = UInt;
if (Name.consume_front("float")) {
T.Kind = Float;
} else if (Name.consume_front("poly")) {
T.Kind = Poly;
} else if (Name.consume_front("bfloat")) {
T.Kind = BFloat16;
} else if (Name.consume_front("mfloat")) {
T.Kind = MFloat8;
} else {
assert(Name.starts_with("int"));
Name = Name.drop_front(3);
}
unsigned I = 0;
for (I = 0; I < Name.size(); ++I) {
if (!isdigit(Name[I]))
break;
}
Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
Name = Name.drop_front(I);
T.Bitwidth = T.ElementBitwidth;
T.NumVectors = 1;
if (Name.consume_front("x")) {
unsigned I = 0;
for (I = 0; I < Name.size(); ++I) {
if (!isdigit(Name[I]))
break;
}
unsigned NumLanes;
Name.substr(0, I).getAsInteger(10, NumLanes);
Name = Name.drop_front(I);
T.Bitwidth = T.ElementBitwidth * NumLanes;
} else {
// Was scalar.
T.NumVectors = 0;
}
if (Name.consume_front("x")) {
unsigned I = 0;
for (I = 0; I < Name.size(); ++I) {
if (!isdigit(Name[I]))
break;
}
Name.substr(0, I).getAsInteger(10, T.NumVectors);
Name = Name.drop_front(I);
}
assert(Name.starts_with("_t") && "Malformed typedef!");
return T;
}
void Type::applyTypespec(bool &Quad) {
std::string S = TS;
ScalarForMangling = false;
Kind = SInt;
ElementBitwidth = ~0U;
NumVectors = 1;
for (char I : S) {
switch (I) {
case 'S':
ScalarForMangling = true;
break;
case 'H':
NoManglingQ = true;
Quad = true;
break;
case 'Q':
Quad = true;
break;
case 'P':
Kind = Poly;
break;
case 'U':
Kind = UInt;
break;
case 'c':
ElementBitwidth = 8;
break;
case 'h':
Kind = Float;
[[fallthrough]];
case 's':
ElementBitwidth = 16;
break;
case 'f':
Kind = Float;
[[fallthrough]];
case 'i':
ElementBitwidth = 32;
break;
case 'd':
Kind = Float;
[[fallthrough]];
case 'l':
ElementBitwidth = 64;
break;
case 'k':
ElementBitwidth = 128;
// Poly doesn't have a 128x1 type.
if (isPoly())
NumVectors = 0;
break;
case 'b':
Kind = BFloat16;
ElementBitwidth = 16;
break;
case 'm':
Kind = MFloat8;
ElementBitwidth = 8;
break;
default:
llvm_unreachable("Unhandled type code!");
}
}
assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
Bitwidth = Quad ? 128 : 64;
}
void Type::applyModifiers(StringRef Mods) {
bool AppliedQuad = false;
applyTypespec(AppliedQuad);
for (char Mod : Mods) {
switch (Mod) {
case '.':
break;
case 'v':
Kind = Void;
break;
case 'S':
Kind = SInt;
break;
case 'U':
Kind = UInt;
break;
case 'B':
Kind = BFloat16;
ElementBitwidth = 16;
break;
case 'F':
Kind = Float;
break;
case 'P':
Kind = Poly;
break;
case 'V':
Kind = FPM;
Bitwidth = ElementBitwidth = 64;
NumVectors = 0;
Immediate = Constant = Pointer = false;
ScalarForMangling = NoManglingQ = true;
break;
case '>':
assert(ElementBitwidth < 128);
ElementBitwidth *= 2;
break;
case '<':
assert(ElementBitwidth > 8);
ElementBitwidth /= 2;
break;
case '1':
NumVectors = 0;
break;
case '2':
NumVectors = 2;
break;
case '3':
NumVectors = 3;
break;
case '4':
NumVectors = 4;
break;
case '*':
Pointer = true;
break;
case 'c':
Constant = true;
break;
case 'Q':
Bitwidth = 128;
break;
case 'q':
Bitwidth = 64;
break;
case 'I':
Kind = SInt;
ElementBitwidth = Bitwidth = 32;
NumVectors = 0;
Immediate = true;
break;
case 'p':
if (isPoly())
Kind = UInt;
break;
case '!':
// Key type, handled elsewhere.
break;
default:
llvm_unreachable("Unhandled character!");
}
}
}
//===----------------------------------------------------------------------===//
// Intrinsic implementation
//===----------------------------------------------------------------------===//
StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
if (Proto.size() == Pos)
return StringRef();
else if (Proto[Pos] != '(')
return Proto.substr(Pos++, 1);
size_t Start = Pos + 1;
size_t End = Proto.find(')', Start);
assert_with_loc(End != StringRef::npos, "unmatched modifier group paren");
Pos = End + 1;
return Proto.slice(Start, End);
}
std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
char typeCode = '\0';
bool printNumber = true;
if (CK == ClassB && TargetGuard == "neon")
return "";
if (this->CK == ClassV)
return "";
if (T.isBFloat16())
return "bf16";
if (T.isMFloat8())
return "mf8";
if (T.isPoly())
typeCode = 'p';
else if (T.isInteger())
typeCode = T.isSigned() ? 's' : 'u';
else
typeCode = 'f';
if (CK == ClassI) {
switch (typeCode) {
default:
break;
case 's':
case 'u':
case 'p':
typeCode = 'i';
break;
}
}
if (CK == ClassB && TargetGuard == "neon") {
typeCode = '\0';
}
std::string S;
if (typeCode != '\0')
S.push_back(typeCode);
if (printNumber)
S += utostr(T.getElementSizeInBits());
return S;
}
std::string Intrinsic::getBuiltinTypeStr() {
ClassKind LocalCK = getClassKind(true);
std::string S;
Type RetT = getReturnType();
if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
!RetT.isFloating() && !RetT.isBFloat16() && !RetT.isMFloat8())
RetT.makeInteger(RetT.getElementSizeInBits(), false);
// Since the return value must be one type, return a vector type of the
// appropriate width which we will bitcast. An exception is made for
// returning structs of 2, 3, or 4 vectors which are returned in a sret-like
// fashion, storing them to a pointer arg.
if (RetT.getNumVectors() > 1) {
S += "vv*"; // void result with void* first argument
} else {
if (RetT.isPoly())
RetT.makeInteger(RetT.getElementSizeInBits(), false);
if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
RetT.makeSigned();
if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
// Cast to vector of 8-bit elements.
RetT.makeInteger(8, true);
S += RetT.builtin_str();
}
for (unsigned I = 0; I < getNumParams(); ++I) {
Type T = getParamType(I);
if (T.isPoly())
T.makeInteger(T.getElementSizeInBits(), false);
if (LocalCK == ClassB && !T.isScalar())
T.makeInteger(8, true);
// Halves always get converted to 8-bit elements.
if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
T.makeInteger(8, true);
if (LocalCK == ClassI && T.isInteger())
T.makeSigned();
if (isArgImmediate(I))
T.makeImmediate(32);
S += T.builtin_str();
}
// Extra constant integer to hold type class enum for this function, e.g. s8
if (LocalCK == ClassB)
S += "i";
return S;
}
std::string Intrinsic::getMangledName(bool ForceClassS) const {
// Check if the prototype has a scalar operand with the type of the vector
// elements. If not, bitcasting the args will take care of arg checking.
// The actual signedness etc. will be taken care of with special enums.
ClassKind LocalCK = CK;
if (!protoHasScalar())
LocalCK = ClassB;
return mangleName(Name, ForceClassS ? ClassS : LocalCK);
}
std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
std::string typeCode = getInstTypeCode(BaseType, LocalCK);
std::string S = Name;
if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||
Name == "vcvt_f32_bf16")
return Name;
if (!typeCode.empty()) {
// If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
if (Name.size() >= 3 && isdigit(Name.back()) &&
Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
S.insert(S.length() - 3, "_" + typeCode);
else
S += "_" + typeCode;
}
if (BaseType != InBaseType) {
// A reinterpret - out the input base type at the end.
S += "_" + getInstTypeCode(InBaseType, LocalCK);
}
if (LocalCK == ClassB && TargetGuard == "neon")
S += "_v";
// Insert a 'q' before the first '_' character so that it ends up before
// _lane or _n on vector-scalar operations.
if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
size_t Pos = S.find('_');
S.insert(Pos, "q");
}
char Suffix = '\0';
if (BaseType.isScalarForMangling()) {
switch (BaseType.getElementSizeInBits()) {
case 8: Suffix = 'b'; break;
case 16: Suffix = 'h'; break;
case 32: Suffix = 's'; break;
case 64: Suffix = 'd'; break;
default: llvm_unreachable("Bad suffix!");
}
}
if (Suffix != '\0') {
size_t Pos = S.find('_');
S.insert(Pos, &Suffix, 1);
}
return S;
}
std::string Intrinsic::replaceParamsIn(std::string S) {
while (S.find('$') != std::string::npos) {
size_t Pos = S.find('$');
size_t End = Pos + 1;
while (isalpha(S[End]))
++End;
std::string VarName = S.substr(Pos + 1, End - Pos - 1);
assert_with_loc(Variables.find(VarName) != Variables.end(),
"Variable not defined!");
S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
}
return S;
}
void Intrinsic::initVariables() {
Variables.clear();
// Modify the TypeSpec per-argument to get a concrete Type, and create
// known variables for each.
for (unsigned I = 1; I < Types.size(); ++I) {
char NameC = '0' + (I - 1);
std::string Name = "p";
Name.push_back(NameC);
Variables[Name] = Variable(Types[I], Name + VariablePostfix);
}
RetVar = Variable(Types[0], "ret" + VariablePostfix);
}
void Intrinsic::emitPrototype(StringRef NamePrefix) {
if (UseMacro) {
OS << "#define ";
} else {
OS << "__ai ";
if (TargetGuard != "")
OS << "__attribute__((target(\"" << TargetGuard << "\"))) ";
OS << Types[0].str() << " ";
}
OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
for (unsigned I = 0; I < getNumParams(); ++I) {
if (I != 0)
OS << ", ";
char NameC = '0' + I;
std::string Name = "p";
Name.push_back(NameC);
assert(Variables.find(Name) != Variables.end());
Variable &V = Variables[Name];
if (!UseMacro)
OS << V.getType().str() << " ";
OS << V.getName();
}
OS << ")";
}
void Intrinsic::emitOpeningBrace() {
if (UseMacro)
OS << " __extension__ ({";
else
OS << " {";
emitNewLine();
}
void Intrinsic::emitClosingBrace() {
if (UseMacro)
OS << "})";
else
OS << "}";
}
void Intrinsic::emitNewLine() {
if (UseMacro)
OS << " \\\n";
else
OS << "\n";
}
void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
if (Dest.getType().getNumVectors() > 1) {
emitNewLine();
for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
OS << " " << Dest.getName() << ".val[" << K << "] = "
<< "__builtin_shufflevector(" << Src.getName() << ".val[" << K << "], "
<< Src.getName() << ".val[" << K << "], __lane_reverse_"
<< Dest.getType().getSizeInBits() << "_"
<< Dest.getType().getElementSizeInBits() << ");";
emitNewLine();
}
} else {
OS << " " << Dest.getName() << " = __builtin_shufflevector("
<< Src.getName() << ", " << Src.getName() << ", __lane_reverse_"
<< Dest.getType().getSizeInBits() << "_"
<< Dest.getType().getElementSizeInBits() << ");";
emitNewLine();
}
}
void Intrinsic::emitArgumentReversal() {
if (isBigEndianSafe())
return;
// Reverse all vector arguments.
for (unsigned I = 0; I < getNumParams(); ++I) {
std::string Name = "p" + utostr(I);
std::string NewName = "rev" + utostr(I);
Variable &V = Variables[Name];
Variable NewV(V.getType(), NewName + VariablePostfix);
if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
continue;
OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";
emitReverseVariable(NewV, V);
V = NewV;
}
}
void Intrinsic::emitReturnVarDecl() {
assert(RetVar.getType() == Types[0]);
// Create a return variable, if we're not void.
if (!RetVar.getType().isVoid()) {
OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
emitNewLine();
}
}
void Intrinsic::emitReturnReversal() {
if (isBigEndianSafe())
return;
if (!getReturnType().isVector() || getReturnType().isVoid() ||
getReturnType().getNumElements() == 1)
return;
emitReverseVariable(RetVar, RetVar);
}
void Intrinsic::emitShadowedArgs() {
// Macro arguments are not type-checked like inline function arguments,
// so assign them to local temporaries to get the right type checking.
if (!UseMacro)
return;
for (unsigned I = 0; I < getNumParams(); ++I) {
// Do not create a temporary for an immediate argument.
// That would defeat the whole point of using a macro!
if (getParamType(I).isImmediate())
continue;
// Do not create a temporary for pointer arguments. The input
// pointer may have an alignment hint.
if (getParamType(I).isPointer())
continue;
std::string Name = "p" + utostr(I);
assert(Variables.find(Name) != Variables.end());
Variable &V = Variables[Name];
std::string NewName = "s" + utostr(I);
Variable V2(V.getType(), NewName + VariablePostfix);
OS << " " << V2.getType().str() << " " << V2.getName() << " = "
<< V.getName() << ";";
emitNewLine();
V = V2;
}
}
bool Intrinsic::protoHasScalar() const {
return any_of(Types,
[](const Type &T) { return T.isScalar() && !T.isImmediate(); });
}
void Intrinsic::emitBodyAsBuiltinCall() {
std::string S;
// If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
// sret-like argument.
bool SRet = getReturnType().getNumVectors() >= 2;
StringRef N = Name;
ClassKind LocalCK = CK;
if (!protoHasScalar())
LocalCK = ClassB;
if (!getReturnType().isVoid() && !SRet)
S += "__builtin_bit_cast(" + RetVar.getType().str() + ", ";
S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "(";
if (SRet)
S += "&" + RetVar.getName() + ", ";
for (unsigned I = 0; I < getNumParams(); ++I) {
Variable &V = Variables["p" + utostr(I)];
Type T = V.getType();
// Handle multiple-vector values specially, emitting each subvector as an
// argument to the builtin.
if (T.getNumVectors() > 1) {
// Check if an explicit cast is needed.
std::string Cast;
if (LocalCK == ClassB) {
Type T2 = T;
T2.makeOneVector();
T2.makeInteger(8, /*Sign=*/true);
Cast = "__builtin_bit_cast(" + T2.str() + ", ";
}
for (unsigned J = 0; J < T.getNumVectors(); ++J)
S += Cast + V.getName() + ".val[" + utostr(J) + "]" +
(Cast.empty() ? ", " : "), ");
continue;
}
std::string Arg = V.getName();
Type CastToType = T;
// Check if an explicit cast is needed.
if (CastToType.isVector()) {
if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
CastToType.makeInteger(8, true);
Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
} else if (LocalCK == ClassI) {
if (CastToType.isInteger()) {
CastToType.makeSigned();
Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
}
}
}
S += Arg + ", ";
}
// Extra constant integer to hold type class enum for this function, e.g. s8
if (getClassKind(true) == ClassB) {
S += utostr(getPolymorphicKeyType().getNeonEnum());
} else {
// Remove extraneous ", ".
S.pop_back();
S.pop_back();
}
if (!getReturnType().isVoid() && !SRet)
S += ")";
S += ");";
std::string RetExpr;
if (!SRet && !RetVar.getType().isVoid())
RetExpr = RetVar.getName() + " = ";
OS << " " << RetExpr << S;
emitNewLine();
}
void Intrinsic::emitBody(StringRef CallPrefix) {
std::vector<std::string> Lines;
if (!Body || Body->empty()) {
// Nothing specific to output - must output a builtin.
emitBodyAsBuiltinCall();
return;
}
// We have a list of "things to output". The last should be returned.
for (auto *I : Body->getElements()) {
if (const auto *SI = dyn_cast<StringInit>(I)) {
Lines.push_back(replaceParamsIn(SI->getAsString()));
} else if (const auto *DI = dyn_cast<DagInit>(I)) {
DagEmitter DE(*this, CallPrefix);
Lines.push_back(DE.emitDag(DI).second + ";");
}
}
assert(!Lines.empty() && "Empty def?");
if (!RetVar.getType().isVoid())
Lines.back().insert(0, RetVar.getName() + " = ");
for (auto &L : Lines) {
OS << " " << L;
emitNewLine();
}
}
void Intrinsic::emitReturn() {
if (RetVar.getType().isVoid())
return;
if (UseMacro)
OS << " " << RetVar.getName() << ";";
else
OS << " return " << RetVar.getName() << ";";
emitNewLine();
}
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(const DagInit *DI) {
// At this point we should only be seeing a def.
const DefInit *DefI = cast<DefInit>(DI->getOperator());
std::string Op = DefI->getAsString();
if (Op == "cast" || Op == "bitcast")
return emitDagCast(DI, Op == "bitcast");
if (Op == "shuffle")
return emitDagShuffle(DI);
if (Op == "dup")
return emitDagDup(DI);
if (Op == "dup_typed")
return emitDagDupTyped(DI);
if (Op == "splat")
return emitDagSplat(DI);
if (Op == "save_temp")
return emitDagSaveTemp(DI);
if (Op == "op")
return emitDagOp(DI);
if (Op == "call" || Op == "call_mangled")
return emitDagCall(DI, Op == "call_mangled");
if (Op == "name_replace")
return emitDagNameReplace(DI);
if (Op == "literal")
return emitDagLiteral(DI);
assert_with_loc(false, "Unknown operation!");
return std::make_pair(Type::getVoid(), "");
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagOp(const DagInit *DI) {
std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
if (DI->getNumArgs() == 2) {
// Unary op.
std::pair<Type, std::string> R =
emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
return std::make_pair(R.first, Op + R.second);
} else {
assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
std::pair<Type, std::string> R1 =
emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
std::pair<Type, std::string> R2 =
emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2)));
assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
}
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagCall(const DagInit *DI, bool MatchMangledName) {
std::vector<Type> Types;
std::vector<std::string> Values;
for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
std::pair<Type, std::string> R =
emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1)));
Types.push_back(R.first);
Values.push_back(R.second);
}
// Look up the called intrinsic.
std::string N;
if (const auto *SI = dyn_cast<StringInit>(DI->getArg(0)))
N = SI->getAsUnquotedString();
else
N = emitDagArg(DI->getArg(0), "").second;
std::optional<std::string> MangledName;
if (MatchMangledName) {
if (Intr.getRecord()->getValueAsString("Name").contains("laneq"))
N += "q";
MangledName = Intr.mangleName(N, ClassS);
}
Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName);
// Make sure the callee is known as an early def.
Callee.setNeededEarly();
Intr.Dependencies.insert(&Callee);
// Now create the call itself.
std::string S;
if (!Callee.isBigEndianSafe())
S += CallPrefix.str();
S += Callee.getMangledName(true) + "(";
for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
if (I != 0)
S += ", ";
S += Values[I];
}
S += ")";
return std::make_pair(Callee.getReturnType(), S);
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagCast(const DagInit *DI, bool IsBitCast) {
// (cast MOD* VAL) -> cast VAL to type given by MOD.
std::pair<Type, std::string> R =
emitDagArg(DI->getArg(DI->getNumArgs() - 1),
std::string(DI->getArgNameStr(DI->getNumArgs() - 1)));
Type castToType = R.first;
for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
// MOD can take several forms:
// 1. $X - take the type of parameter / variable X.
// 2. The value "R" - take the type of the return type.
// 3. a type string
// 4. The value "U" or "S" to switch the signedness.
// 5. The value "H" or "D" to half or double the bitwidth.
// 6. The value "8" to convert to 8-bit (signed) integer lanes.
if (!DI->getArgNameStr(ArgIdx).empty()) {
assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) !=
Intr.Variables.end(),
"Variable not found");
castToType =
Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType();
} else {
const auto *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
assert_with_loc(SI, "Expected string type or $Name for cast type");
if (SI->getAsUnquotedString() == "R") {
castToType = Intr.getReturnType();
} else if (SI->getAsUnquotedString() == "U") {
castToType.makeUnsigned();
} else if (SI->getAsUnquotedString() == "S") {
castToType.makeSigned();
} else if (SI->getAsUnquotedString() == "H") {
castToType.halveLanes();
} else if (SI->getAsUnquotedString() == "D") {
castToType.doubleLanes();
} else if (SI->getAsUnquotedString() == "8") {
castToType.makeInteger(8, true);
} else if (SI->getAsUnquotedString() == "32") {
castToType.make32BitElement();
} else {
castToType = Type::fromTypedefName(SI->getAsUnquotedString());
assert_with_loc(!castToType.isVoid(), "Unknown typedef");
}
}
}
std::string S;
if (IsBitCast)
S = "__builtin_bit_cast(" + castToType.str() + ", " + R.second + ")";
else
S = "(" + castToType.str() + ")(" + R.second + ")";
return std::make_pair(castToType, S);
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagShuffle(const DagInit *DI) {
// See the documentation in arm_neon.td for a description of these operators.
class LowHalf : public SetTheory::Operator {
public:
void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
ArrayRef<SMLoc> Loc) override {
SetTheory::RecSet Elts2;
ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
}
};
class HighHalf : public SetTheory::Operator {
public:
void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
ArrayRef<SMLoc> Loc) override {
SetTheory::RecSet Elts2;
ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
}
};
class Rev : public SetTheory::Operator {
unsigned ElementSize;
public:
Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
ArrayRef<SMLoc> Loc) override {
SetTheory::RecSet Elts2;
ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
VectorSize /= ElementSize;
std::vector<const Record *> Revved;
for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
for (int LI = VectorSize - 1; LI >= 0; --LI) {
Revved.push_back(Elts2[VI + LI]);
}
}
Elts.insert_range(Revved);
}
};
class MaskExpander : public SetTheory::Expander {
unsigned N;
public:
MaskExpander(unsigned N) : N(N) {}
void expand(SetTheory &ST, const Record *R,
SetTheory::RecSet &Elts) override {
unsigned Addend = 0;
if (R->getName() == "mask0")
Addend = 0;
else if (R->getName() == "mask1")
Addend = N;
else
return;
for (unsigned I = 0; I < N; ++I)
Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
}
};
// (shuffle arg1, arg2, sequence)
std::pair<Type, std::string> Arg1 =
emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
std::pair<Type, std::string> Arg2 =
emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
assert_with_loc(Arg1.first == Arg2.first,
"Different types in arguments to shuffle!");
SetTheory ST;
SetTheory::RecSet Elts;
ST.addOperator("lowhalf", std::make_unique<LowHalf>());
ST.addOperator("highhalf", std::make_unique<HighHalf>());
ST.addOperator("rev",
std::make_unique<Rev>(Arg1.first.getElementSizeInBits()));
ST.addExpander("MaskExpand",
std::make_unique<MaskExpander>(Arg1.first.getNumElements()));
ST.evaluate(DI->getArg(2), Elts, {});
std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
for (auto &E : Elts) {
StringRef Name = E->getName();
assert_with_loc(Name.starts_with("sv"),
"Incorrect element kind in shuffle mask!");
S += ", " + Name.drop_front(2).str();
}
S += ")";
// Recalculate the return type - the shuffle may have halved or doubled it.
Type T(Arg1.first);
if (Elts.size() > T.getNumElements()) {
assert_with_loc(
Elts.size() == T.getNumElements() * 2,
"Can only double or half the number of elements in a shuffle!");
T.doubleLanes();
} else if (Elts.size() < T.getNumElements()) {
assert_with_loc(
Elts.size() == T.getNumElements() / 2,
"Can only double or half the number of elements in a shuffle!");
T.halveLanes();
}
return std::make_pair(T, S);
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagDup(const DagInit *DI) {
assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
std::pair<Type, std::string> A =
emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
Type T = Intr.getBaseType();
assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
std::string S = "(" + T.str() + ") {";
for (unsigned I = 0; I < T.getNumElements(); ++I) {
if (I != 0)
S += ", ";
S += A.second;
}
S += "}";
return std::make_pair(T, S);
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagDupTyped(const DagInit *DI) {
assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments");
std::pair<Type, std::string> B =
emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
assert_with_loc(B.first.isScalar(),
"dup_typed() requires a scalar as the second argument");
Type T;
// If the type argument is a constant string, construct the type directly.
if (const auto *SI = dyn_cast<StringInit>(DI->getArg(0))) {
T = Type::fromTypedefName(SI->getAsUnquotedString());
assert_with_loc(!T.isVoid(), "Unknown typedef");
} else
T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first;
assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!");
std::string S = "(" + T.str() + ") {";
for (unsigned I = 0; I < T.getNumElements(); ++I) {
if (I != 0)
S += ", ";
S += B.second;
}
S += "}";
return std::make_pair(T, S);
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagSplat(const DagInit *DI) {
assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
std::pair<Type, std::string> A =
emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
std::pair<Type, std::string> B =
emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
assert_with_loc(B.first.isScalar(),
"splat() requires a scalar int as the second argument");
std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
S += ", " + B.second;
}
S += ")";
return std::make_pair(Intr.getBaseType(), S);
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagSaveTemp(const DagInit *DI) {
assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
std::pair<Type, std::string> A =
emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
assert_with_loc(!A.first.isVoid(),
"Argument to save_temp() must have non-void type!");
std::string N = std::string(DI->getArgNameStr(0));
assert_with_loc(!N.empty(),
"save_temp() expects a name as the first argument");
auto [It, Inserted] =
Intr.Variables.try_emplace(N, A.first, N + Intr.VariablePostfix);
assert_with_loc(Inserted, "Variable already defined!");
std::string S = A.first.str() + " " + It->second.getName() + " = " + A.second;
return std::make_pair(Type::getVoid(), S);
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagNameReplace(const DagInit *DI) {
std::string S = Intr.Name;
assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
size_t Idx = S.find(ToReplace);
assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
S.replace(Idx, ToReplace.size(), ReplaceWith);
return std::make_pair(Type::getVoid(), S);
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagLiteral(const DagInit *DI) {
std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
return std::make_pair(Type::fromTypedefName(Ty), Value);
}
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagArg(const Init *Arg, std::string ArgName) {
if (!ArgName.empty()) {
assert_with_loc(!Arg->isComplete(),
"Arguments must either be DAGs or names, not both!");
assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
"Variable not defined!");
Variable &V = Intr.Variables[ArgName];
return std::make_pair(V.getType(), V.getName());
}
assert(Arg && "Neither ArgName nor Arg?!");
const auto *DI = dyn_cast<DagInit>(Arg);
assert_with_loc(DI, "Arguments must either be DAGs or names!");
return emitDag(DI);
}
std::string Intrinsic::generate() {
// Avoid duplicated code for big and little endian
if (isBigEndianSafe()) {
generateImpl(false, "", "");
return OS.str();
}
// Little endian intrinsics are simple and don't require any argument
// swapping.
OS << "#ifdef __LITTLE_ENDIAN__\n";
generateImpl(false, "", "");
OS << "#else\n";
// Big endian intrinsics are more complex. The user intended these intrinsics
// to operate on a vector "as-if" loaded by LDR (for AArch64), VLDR (for
// 64-bit vectors on AArch32), or VLDM (for 128-bit vectors on AArch32) but
// we load as-if LD1 (for AArch64) or VLD1 (for AArch32). So we should swap
// all arguments and swap the return value too.
//
// If we call sub-intrinsics, we should call a version that does
// not re-swap the arguments!
generateImpl(true, "", "__noswap_");
// If we're needed early, create a non-swapping variant for
// big-endian.
if (NeededEarly) {
generateImpl(false, "__noswap_", "__noswap_");
}
OS << "#endif\n\n";
return OS.str();
}
void Intrinsic::generateImpl(bool ReverseArguments,
StringRef NamePrefix, StringRef CallPrefix) {
CurrentRecord = R;
// If we call a macro, our local variables may be corrupted due to
// lack of proper lexical scoping. So, add a globally unique postfix
// to every variable.
//
// indexBody() should have set up the Dependencies set by now.
for (auto *I : Dependencies)
if (I->UseMacro) {
VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
break;
}
initVariables();
emitPrototype(NamePrefix);
if (IsUnavailable) {
OS << " __attribute__((unavailable));";
} else {
emitOpeningBrace();
// Emit return variable declaration first as to not trigger
// -Wdeclaration-after-statement.
emitReturnVarDecl();
emitShadowedArgs();
if (ReverseArguments)
emitArgumentReversal();
emitBody(CallPrefix);
if (ReverseArguments)
emitReturnReversal();
emitReturn();
emitClosingBrace();
}
OS << "\n";
CurrentRecord = nullptr;
}
void Intrinsic::indexBody() {
CurrentRecord = R;
initVariables();
// Emit return variable declaration first as to not trigger
// -Wdeclaration-after-statement.
emitReturnVarDecl();
emitBody("");
OS.str("");
CurrentRecord = nullptr;
}
//===----------------------------------------------------------------------===//
// NeonEmitter implementation
//===----------------------------------------------------------------------===//
Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
std::optional<std::string> MangledName) {
// First, look up the name in the intrinsic map.
assert_with_loc(IntrinsicMap.find(Name) != IntrinsicMap.end(),
("Intrinsic '" + Name + "' not found!").str());
auto &V = IntrinsicMap.find(Name)->second;
std::vector<Intrinsic *> GoodVec;
// Create a string to print if we end up failing.
std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
for (unsigned I = 0; I < Types.size(); ++I) {
if (I != 0)
ErrMsg += ", ";
ErrMsg += Types[I].str();
}
ErrMsg += ")'\n";
ErrMsg += "Available overloads:\n";
// Now, look through each intrinsic implementation and see if the types are
// compatible.
for (auto &I : V) {
ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName();
ErrMsg += "(";
for (unsigned A = 0; A < I.getNumParams(); ++A) {
if (A != 0)
ErrMsg += ", ";
ErrMsg += I.getParamType(A).str();
}
ErrMsg += ")\n";
if (MangledName && MangledName != I.getMangledName(true))
continue;
if (I.getNumParams() != Types.size())
continue;
unsigned ArgNum = 0;
bool MatchingArgumentTypes = all_of(Types, [&](const auto &Type) {
return Type == I.getParamType(ArgNum++);
});
if (MatchingArgumentTypes)
GoodVec.push_back(&I);
}
assert_with_loc(!GoodVec.empty(),
"No compatible intrinsic found - " + ErrMsg);
assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
return *GoodVec.front();
}
void NeonEmitter::createIntrinsic(const Record *R,
SmallVectorImpl<Intrinsic *> &Out) {
std::string Name = std::string(R->getValueAsString("Name"));
std::string Proto = std::string(R->getValueAsString("Prototype"));
std::string Types = std::string(R->getValueAsString("Types"));
const Record *OperationRec = R->getValueAsDef("Operation");
bool BigEndianSafe = R->getValueAsBit("BigEndianSafe");
std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
// Set the global current record. This allows assert_with_loc to produce
// decent location information even when highly nested.
CurrentRecord = R;
const ListInit *Body = OperationRec->getValueAsListInit("Ops");
std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
ClassKind CK = ClassNone;
if (!R->getDirectSuperClasses().empty())
CK = ClassMap[R->getDirectSuperClasses()[0].first];
std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
if (!CartesianProductWith.empty()) {
std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith);
for (auto TS : TypeSpecs) {
Type DefaultT(TS, ".");
for (auto SrcTS : ProductTypeSpecs) {
Type DefaultSrcT(SrcTS, ".");
if (TS == SrcTS ||
DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
continue;
NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
}
}
} else {
for (auto TS : TypeSpecs) {
NewTypeSpecs.push_back(std::make_pair(TS, TS));
}
}
sort(NewTypeSpecs);
NewTypeSpecs.erase(llvm::unique(NewTypeSpecs), NewTypeSpecs.end());
auto &Entry = IntrinsicMap[Name];
for (auto &I : NewTypeSpecs) {
// MFloat8 type is only available on AArch64. If encountered set ArchGuard
// correctly.
std::string NewArchGuard = ArchGuard;
if (Type(I.first, ".").isMFloat8()) {
if (NewArchGuard.empty()) {
NewArchGuard = "defined(__aarch64__)";
} else if (NewArchGuard.find("defined(__aarch64__)") ==
std::string::npos) {
NewArchGuard = "defined(__aarch64__) && (" + NewArchGuard + ")";
}
}
Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
NewArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
Out.push_back(&Entry.back());
}
CurrentRecord = nullptr;
}
/// genBuiltinsDef: Generate the builtin infos, checking for unique builtin
/// declarations.
void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
SmallVectorImpl<Intrinsic *> &Defs) {
// We only want to emit a builtin once, and in order of its name.
std::map<std::string, Intrinsic *> Builtins;
llvm::StringToOffsetTable Table;
Table.GetOrAddStringOffset("");
Table.GetOrAddStringOffset("n");
for (auto *Def : Defs) {
if (Def->hasBody())
continue;
if (Builtins.insert({Def->getMangledName(), Def}).second) {
Table.GetOrAddStringOffset(Def->getMangledName());
Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
Table.GetOrAddStringOffset(Def->getTargetGuard());
}
}
OS << "#ifdef GET_NEON_BUILTIN_ENUMERATORS\n";
for (const auto &[Name, Def] : Builtins) {
OS << " BI__builtin_neon_" << Name << ",\n";
}
OS << "#endif // GET_NEON_BUILTIN_ENUMERATORS\n\n";
OS << "#ifdef GET_NEON_BUILTIN_STR_TABLE\n";
Table.EmitStringTableDef(OS, "BuiltinStrings");
OS << "#endif // GET_NEON_BUILTIN_STR_TABLE\n\n";
OS << "#ifdef GET_NEON_BUILTIN_INFOS\n";
for (const auto &[Name, Def] : Builtins) {
OS << " Builtin::Info{Builtin::Info::StrOffsets{"
<< Table.GetStringOffset(Def->getMangledName()) << " /* "
<< Def->getMangledName() << " */, ";
OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* "
<< Def->getBuiltinTypeStr() << " */, ";
OS << Table.GetStringOffset("n") << " /* n */, ";
OS << Table.GetStringOffset(Def->getTargetGuard()) << " /* "
<< Def->getTargetGuard() << " */}, ";
OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
}
OS << "#endif // GET_NEON_BUILTIN_INFOS\n\n";
}
void NeonEmitter::genStreamingSVECompatibleList(
raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n";
std::set<std::string> Emitted;
for (auto *Def : Defs) {
// If the def has a body (that is, it has Operation DAGs), it won't call
// __builtin_neon_* so we don't need to generate a definition for it.
if (Def->hasBody())
continue;
std::string Name = Def->getMangledName();
if (Emitted.find(Name) != Emitted.end())
continue;
// FIXME: We should make exceptions here for some NEON builtins that are
// permitted in streaming mode.
OS << "case NEON::BI__builtin_neon_" << Name
<< ": BuiltinType = ArmNonStreaming; break;\n";
Emitted.insert(Name);
}
OS << "#endif\n\n";
}
/// Generate the ARM and AArch64 overloaded type checking code for
/// SemaChecking.cpp, checking for unique builtin declarations.
void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
SmallVectorImpl<Intrinsic *> &Defs) {
OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
// We record each overload check line before emitting because subsequent Inst
// definitions may extend the number of permitted types (i.e. augment the
// Mask). Use std::map to avoid sorting the table by hash number.
struct OverloadInfo {
uint64_t Mask = 0ULL;
int PtrArgNum = 0;
bool HasConstPtr = false;
OverloadInfo() = default;
};
std::map<std::string, OverloadInfo> OverloadMap;
for (auto *Def : Defs) {
// If the def has a body (that is, it has Operation DAGs), it won't call
// __builtin_neon_* so we don't need to generate a definition for it.
if (Def->hasBody())
continue;
// Functions which have a scalar argument cannot be overloaded, no need to
// check them if we are emitting the type checking code.
if (Def->protoHasScalar())
continue;
uint64_t Mask = 0ULL;
Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum();
// Check if the function has a pointer or const pointer argument.
int PtrArgNum = -1;
bool HasConstPtr = false;
for (unsigned I = 0; I < Def->getNumParams(); ++I) {
const auto &Type = Def->getParamType(I);
if (Type.isPointer()) {
PtrArgNum = I;
HasConstPtr = Type.isConstPointer();
}
}
// For sret builtins, adjust the pointer argument index.
if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
PtrArgNum += 1;
std::string Name = Def->getName();
// Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
// vst1_lane, vldap1_lane, and vstl1_lane intrinsics. Using a pointer to
// the vector element type with one of those operations causes codegen to
// select an aligned load/store instruction. If you want an unaligned
// operation, the pointer argument needs to have less alignment than element
// type, so just accept any pointer type.
if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane" ||
Name == "vldap1_lane" || Name == "vstl1_lane") {
PtrArgNum = -1;
HasConstPtr = false;
}
if (Mask) {
OverloadInfo &OI = OverloadMap[Def->getMangledName()];
OI.Mask |= Mask;
OI.PtrArgNum |= PtrArgNum;
OI.HasConstPtr = HasConstPtr;
}
}
for (auto &I : OverloadMap) {
OverloadInfo &OI = I.second;
OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
if (OI.PtrArgNum >= 0)
OS << "; PtrArgNum = " << OI.PtrArgNum;
if (OI.HasConstPtr)
OS << "; HasConstPtr = true";
OS << "; break;\n";
}
OS << "#endif\n\n";
}
inline bool
NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
const ArrayRef<ImmCheck> ChecksB) {
// If multiple intrinsics map to the same builtin, we must ensure that the
// intended range checks performed in SemaArm.cpp do not contradict each
// other, as these are emitted once per-buitlin.
//
// The arguments to be checked and type of each check to be performed must be
// the same. The element types may differ as they will be resolved
// per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes
// are not and so must be the same.
bool compat =
std::equal(ChecksA.begin(), ChecksA.end(), ChecksB.begin(), ChecksB.end(),
[](const auto &A, const auto &B) {
return A.getImmArgIdx() == B.getImmArgIdx() &&
A.getKind() == B.getKind() &&
A.getVecSizeInBits() == B.getVecSizeInBits();
});
return compat;
}
void NeonEmitter::genIntrinsicRangeCheckCode(
raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted;
OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
for (auto &Def : Defs) {
// If the Def has a body (operation DAGs), it is not a __builtin_neon_
if (Def->hasBody() || !Def->hasImmediate())
continue;
// Sorted by immediate argument index
ArrayRef<ImmCheck> Checks = Def->getImmChecks();
auto [It, Inserted] = Emitted.try_emplace(Def->getMangledName(), Checks);
if (!Inserted) {
assert(areRangeChecksCompatible(Checks, It->second) &&
"Neon intrinsics with incompatible immediate range checks cannot "
"share a builtin.");
continue; // Ensure this is emitted only once
}
// Emit builtin's range checks
OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
for (const auto &Check : Checks) {
OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
<< Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
<< Check.getVecSizeInBits() << ");\n"
<< " break;\n";
}
}
OS << "#endif\n\n";
}
/// runHeader - Emit a file with sections defining:
/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
/// 2. the SemaChecking code for the type overload checking.
/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
void NeonEmitter::runHeader(raw_ostream &OS) {
SmallVector<Intrinsic *, 128> Defs;
for (const Record *R : Records.getAllDerivedDefinitions("Inst"))
createIntrinsic(R, Defs);
// Generate shared BuiltinsXXX.def
genBuiltinsDef(OS, Defs);
// Generate ARM overloaded type checking code for SemaChecking.cpp
genOverloadTypeCheckCode(OS, Defs);
genStreamingSVECompatibleList(OS, Defs);
// Generate ARM range checking code for shift/lane immediates.
genIntrinsicRangeCheckCode(OS, Defs);
}
static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
std::string TypedefTypes(types);
std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
// Emit vector typedefs.
bool InIfdef = false;
for (auto &TS : TDTypeVec) {
bool IsA64 = false;
Type T(TS, ".");
if (T.isDouble() || T.isMFloat8())
IsA64 = true;
if (InIfdef && !IsA64) {
OS << "#endif\n";
InIfdef = false;
}
if (!InIfdef && IsA64) {
OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
InIfdef = true;
}
if (T.isPoly())
OS << "typedef __attribute__((neon_polyvector_type(";
else
OS << "typedef __attribute__((neon_vector_type(";
Type T2 = T;
T2.makeScalar();
OS << T.getNumElements();
OS << "))) " << T2.str();
OS << " " << T.str() << ";\n";
}
if (InIfdef)
OS << "#endif\n";
OS << "\n";
// Emit struct typedefs.
InIfdef = false;
for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
for (auto &TS : TDTypeVec) {
bool IsA64 = false;
Type T(TS, ".");
if (T.isDouble() || T.isMFloat8())
IsA64 = true;
if (InIfdef && !IsA64) {
OS << "#endif\n";
InIfdef = false;
}
if (!InIfdef && IsA64) {
OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
InIfdef = true;
}
const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
Type VT(TS, Mods);
OS << "typedef struct " << VT.str() << " {\n";
OS << " " << T.str() << " val";
OS << "[" << NumMembers << "]";
OS << ";\n} ";
OS << VT.str() << ";\n";
OS << "\n";
}
}
if (InIfdef)
OS << "#endif\n";
}
/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::run(raw_ostream &OS) {
OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
"------------------------------"
"---===\n"
" *\n"
" * Permission is hereby granted, free of charge, to any person "
"obtaining "
"a copy\n"
" * of this software and associated documentation files (the "
"\"Software\"),"
" to deal\n"
" * in the Software without restriction, including without limitation "
"the "
"rights\n"
" * to use, copy, modify, merge, publish, distribute, sublicense, "
"and/or sell\n"
" * copies of the Software, and to permit persons to whom the Software "
"is\n"
" * furnished to do so, subject to the following conditions:\n"
" *\n"
" * The above copyright notice and this permission notice shall be "
"included in\n"
" * all copies or substantial portions of the Software.\n"
" *\n"
" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
"EXPRESS OR\n"
" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
"MERCHANTABILITY,\n"
" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
"SHALL THE\n"
" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
"OTHER\n"
" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
"ARISING FROM,\n"
" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
"DEALINGS IN\n"
" * THE SOFTWARE.\n"
" *\n"
" *===-----------------------------------------------------------------"
"---"
"---===\n"
" */\n\n";
OS << "#ifndef __ARM_NEON_H\n";
OS << "#define __ARM_NEON_H\n\n";
OS << "#if !defined(__arm__) && !defined(__aarch64__) && "
"!defined(__arm64ec__)\n";
OS << "#error \"<arm_neon.h> is intended only for ARM and AArch64 "
"targets\"\n";
OS << "#elif !defined(__ARM_FP)\n";
OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
"Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
OS << "#else\n\n";
OS << "#include <stdint.h>\n\n";
OS << "#include <arm_bf16.h>\n";
OS << "#include <arm_vector_types.h>\n";
// For now, signedness of polynomial types depends on target
OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
OS << "typedef uint8_t poly8_t;\n";
OS << "typedef uint16_t poly16_t;\n";
OS << "typedef uint64_t poly64_t;\n";
OS << "typedef __uint128_t poly128_t;\n";
OS << "#else\n";
OS << "typedef int8_t poly8_t;\n";
OS << "typedef int16_t poly16_t;\n";
OS << "typedef int64_t poly64_t;\n";
OS << "#endif\n";
emitNeonTypeDefs("PcQPcPsQPsPlQPl", OS);
OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
"__nodebug__))\n\n";
// Shufflevector arguments lists for endian-swapping vectors for big-endian
// targets. For AArch64, we need to reverse every lane in the vector, but for
// AArch32 we need to reverse the lanes within each 64-bit chunk of the
// vector. The naming convention here is __lane_reverse_<n>_<m>, where <n> is
// the length of the vector in bits, and <m> is length of each lane in bits.
OS << "#if !defined(__LITTLE_ENDIAN__)\n";
OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
OS << "#define __lane_reverse_64_32 1,0\n";
OS << "#define __lane_reverse_64_16 3,2,1,0\n";
OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
OS << "#define __lane_reverse_128_64 1,0\n";
OS << "#define __lane_reverse_128_32 3,2,1,0\n";
OS << "#define __lane_reverse_128_16 7,6,5,4,3,2,1,0\n";
OS << "#define __lane_reverse_128_8 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0\n";
OS << "#else\n";
OS << "#define __lane_reverse_64_32 1,0\n";
OS << "#define __lane_reverse_64_16 3,2,1,0\n";
OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
OS << "#define __lane_reverse_128_64 0,1\n";
OS << "#define __lane_reverse_128_32 1,0,3,2\n";
OS << "#define __lane_reverse_128_16 3,2,1,0,7,6,5,4\n";
OS << "#define __lane_reverse_128_8 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8\n";
OS << "#endif\n";
OS << "#endif\n";
SmallVector<Intrinsic *, 128> Defs;
for (const Record *R : Records.getAllDerivedDefinitions("Inst"))
createIntrinsic(R, Defs);
for (auto *I : Defs)
I->indexBody();
stable_sort(Defs, deref<std::less<>>());
// Only emit a def when its requirements have been met.
// FIXME: This loop could be made faster, but it's fast enough for now.
bool MadeProgress = true;
std::string InGuard;
while (!Defs.empty() && MadeProgress) {
MadeProgress = false;
for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
I != Defs.end(); /*No step*/) {
bool DependenciesSatisfied = true;
for (auto *II : (*I)->getDependencies()) {
if (is_contained(Defs, II))
DependenciesSatisfied = false;
}
if (!DependenciesSatisfied) {
// Try the next one.
++I;
continue;
}
// Emit #endif/#if pair if needed.
if ((*I)->getArchGuard() != InGuard) {
if (!InGuard.empty())
OS << "#endif\n";
InGuard = (*I)->getArchGuard();
if (!InGuard.empty())
OS << "#if " << InGuard << "\n";
}
// Actually generate the intrinsic code.
OS << (*I)->generate();
MadeProgress = true;
I = Defs.erase(I);
}
}
assert(Defs.empty() && "Some requirements were not satisfied!");
if (!InGuard.empty())
OS << "#endif\n";
OS << "\n";
OS << "#undef __ai\n\n";
OS << "#endif /* if !defined(__ARM_NEON) */\n";
OS << "#endif /* ifndef __ARM_FP */\n";
}
/// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::runFP16(raw_ostream &OS) {
OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
"------------------------------"
"---===\n"
" *\n"
" * Permission is hereby granted, free of charge, to any person "
"obtaining a copy\n"
" * of this software and associated documentation files (the "
"\"Software\"), to deal\n"
" * in the Software without restriction, including without limitation "
"the rights\n"
" * to use, copy, modify, merge, publish, distribute, sublicense, "
"and/or sell\n"
" * copies of the Software, and to permit persons to whom the Software "
"is\n"
" * furnished to do so, subject to the following conditions:\n"
" *\n"
" * The above copyright notice and this permission notice shall be "
"included in\n"
" * all copies or substantial portions of the Software.\n"
" *\n"
" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
"EXPRESS OR\n"
" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
"MERCHANTABILITY,\n"
" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
"SHALL THE\n"
" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
"OTHER\n"
" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
"ARISING FROM,\n"
" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
"DEALINGS IN\n"
" * THE SOFTWARE.\n"
" *\n"
" *===-----------------------------------------------------------------"
"---"
"---===\n"
" */\n\n";
OS << "#ifndef __ARM_FP16_H\n";
OS << "#define __ARM_FP16_H\n\n";
OS << "#include <stdint.h>\n\n";
OS << "typedef __fp16 float16_t;\n";
OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
"__nodebug__))\n\n";
SmallVector<Intrinsic *, 128> Defs;
for (const Record *R : Records.getAllDerivedDefinitions("Inst"))
createIntrinsic(R, Defs);
for (auto *I : Defs)
I->indexBody();
stable_sort(Defs, deref<std::less<>>());
// Only emit a def when its requirements have been met.
// FIXME: This loop could be made faster, but it's fast enough for now.
bool MadeProgress = true;
std::string InGuard;
while (!Defs.empty() && MadeProgress) {
MadeProgress = false;
for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
I != Defs.end(); /*No step*/) {
bool DependenciesSatisfied = true;
for (auto *II : (*I)->getDependencies()) {
if (is_contained(Defs, II))
DependenciesSatisfied = false;
}
if (!DependenciesSatisfied) {
// Try the next one.
++I;
continue;
}
// Emit #endif/#if pair if needed.
if ((*I)->getArchGuard() != InGuard) {
if (!InGuard.empty())
OS << "#endif\n";
InGuard = (*I)->getArchGuard();
if (!InGuard.empty())
OS << "#if " << InGuard << "\n";
}
// Actually generate the intrinsic code.
OS << (*I)->generate();
MadeProgress = true;
I = Defs.erase(I);
}
}
assert(Defs.empty() && "Some requirements were not satisfied!");
if (!InGuard.empty())
OS << "#endif\n";
OS << "\n";
OS << "#undef __ai\n\n";
OS << "#endif /* __ARM_FP16_H */\n";
}
void NeonEmitter::runVectorTypes(raw_ostream &OS) {
OS << "/*===---- arm_vector_types - ARM vector type "
"------===\n"
" *\n"
" *\n"
" * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
"Exceptions.\n"
" * See https://llvm.org/LICENSE.txt for license information.\n"
" * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
" *\n"
" *===-----------------------------------------------------------------"
"------===\n"
" */\n\n";
OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n";
OS << "#error \"This file should not be used standalone. Please include"
" arm_neon.h or arm_sve.h instead\"\n\n";
OS << "#endif\n";
OS << "#ifndef __ARM_NEON_TYPES_H\n";
OS << "#define __ARM_NEON_TYPES_H\n";
OS << "typedef float float32_t;\n";
OS << "typedef __fp16 float16_t;\n";
OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
OS << "typedef __mfp8 mfloat8_t;\n";
OS << "typedef double float64_t;\n";
OS << "#endif\n\n";
OS << R"(
typedef uint64_t fpm_t;
enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
__arm_fpm_init(void) {
return 0;
}
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
return (__fpm & ~7ull) | (fpm_t)__format;
}
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u);
}
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u);
}
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u);
}
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u);
}
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
return (__fpm & ~0x7f0000ull) | (__scale << 16u);
}
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u);
}
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
return (uint32_t)__fpm | (__scale << 32u);
}
)";
emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlmQmhQhfQfdQd", OS);
emitNeonTypeDefs("bQb", OS);
OS << "#endif // __ARM_NEON_TYPES_H\n";
}
void NeonEmitter::runBF16(raw_ostream &OS) {
OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
"-----------------------------------===\n"
" *\n"
" *\n"
" * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
"Exceptions.\n"
" * See https://llvm.org/LICENSE.txt for license information.\n"
" * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
" *\n"
" *===-----------------------------------------------------------------"
"------===\n"
" */\n\n";
OS << "#ifndef __ARM_BF16_H\n";
OS << "#define __ARM_BF16_H\n\n";
OS << "typedef __bf16 bfloat16_t;\n";
OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
"__nodebug__))\n\n";
SmallVector<Intrinsic *, 128> Defs;
for (const Record *R : Records.getAllDerivedDefinitions("Inst"))
createIntrinsic(R, Defs);
for (auto *I : Defs)
I->indexBody();
stable_sort(Defs, deref<std::less<>>());
// Only emit a def when its requirements have been met.
// FIXME: This loop could be made faster, but it's fast enough for now.
bool MadeProgress = true;
std::string InGuard;
while (!Defs.empty() && MadeProgress) {
MadeProgress = false;
for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
I != Defs.end(); /*No step*/) {
bool DependenciesSatisfied = true;
for (auto *II : (*I)->getDependencies()) {
if (is_contained(Defs, II))
DependenciesSatisfied = false;
}
if (!DependenciesSatisfied) {
// Try the next one.
++I;
continue;
}
// Emit #endif/#if pair if needed.
if ((*I)->getArchGuard() != InGuard) {
if (!InGuard.empty())
OS << "#endif\n";
InGuard = (*I)->getArchGuard();
if (!InGuard.empty())
OS << "#if " << InGuard << "\n";
}
// Actually generate the intrinsic code.
OS << (*I)->generate();
MadeProgress = true;
I = Defs.erase(I);
}
}
assert(Defs.empty() && "Some requirements were not satisfied!");
if (!InGuard.empty())
OS << "#endif\n";
OS << "\n";
OS << "#undef __ai\n\n";
OS << "#endif\n";
}
void clang::EmitNeon(const RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).run(OS);
}
void clang::EmitFP16(const RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).runFP16(OS);
}
void clang::EmitBF16(const RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).runBF16(OS);
}
void clang::EmitNeonSema(const RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).runHeader(OS);
}
void clang::EmitVectorTypes(const RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).runVectorTypes(OS);
}
void clang::EmitNeonTest(const RecordKeeper &Records, raw_ostream &OS) {
llvm_unreachable("Neon test generation no longer implemented!");
}