llvm-project/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
Chandler Carruth b968fd9502
[StrTable] Mechanically convert NVPTX builtins to use TableGen (#122873)
This switches them to use tho common TableGen layer, extending it to
support the missing features needed by the NVPTX backend.

The biggest thing was to build a TableGen system that computes the
cumulative SM and PTX feature sets the same way the macros did. That's
done with some string concatenation tricks in TableGen, but they worked
out pretty neatly and are very comparable in complexity to the macro
version.

Then the actual defines were mapped over using a very hacky Python
script. It was never productionized or intended to work in the future,
but for posterity:

https://gist.github.com/chandlerc/10bdf8fb1312e252b4a501bace184b66

Last but not least, there was a very odd "bug" in one of the converted
builtins' prototype in the TableGen model: it didn't handle uses of `Z`
and `U` both as *qualifiers* of a single type, treating `Z` as its own
`int32_t` type. So my hacky Python script converted `ZUi` into two
types, an `int32_t` and an `unsigned int`. This produced a very wrong
prototype. But the tests caught this nicely and I fixed it manually
rather than trying to improve the Python script as it occurred in
exactly one place I could find.

This should provide direct benefits of allowing future refactorings to
more directly leverage TableGen to express builtins more structurally
rather than textually. It will also make my efforts to move builtins to
string tables significantly more effective for the NVPTX backend where
the X-macro approach resulted in *significantly* less efficient string
tables than other targets due to the long repeated feature strings.
2025-01-27 22:45:37 -08:00

470 lines
15 KiB
C++

//===-- ClangBuiltinsEmitter.cpp - Generate Clang builtins tables ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This tablegen backend emits Clang's builtins tables.
//
//===----------------------------------------------------------------------===//
#include "TableGenBackends.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
namespace {
enum class BuiltinType {
Builtin,
AtomicBuiltin,
LibBuiltin,
LangBuiltin,
TargetBuiltin,
TargetLibBuiltin,
};
class PrototypeParser {
public:
PrototypeParser(StringRef Substitution, const Record *Builtin)
: Loc(Builtin->getFieldLoc("Prototype")), Substitution(Substitution),
EnableOpenCLLong(Builtin->getValueAsBit("EnableOpenCLLong")) {
ParsePrototype(Builtin->getValueAsString("Prototype"));
}
private:
void ParsePrototype(StringRef Prototype) {
Prototype = Prototype.trim();
// Some builtins don't have an expressible prototype, simply emit an empty
// string for them.
if (Prototype.empty()) {
Type = "";
return;
}
ParseTypes(Prototype);
}
void ParseTypes(StringRef &Prototype) {
auto ReturnType = Prototype.take_until([](char c) { return c == '('; });
ParseType(ReturnType);
Prototype = Prototype.drop_front(ReturnType.size() + 1);
if (!Prototype.ends_with(")"))
PrintFatalError(Loc, "Expected closing brace at end of prototype");
Prototype = Prototype.drop_back();
// Look through the input parameters.
const size_t end = Prototype.size();
for (size_t I = 0; I != end;) {
const StringRef Current = Prototype.substr(I, end);
// Skip any leading space or commas
if (Current.starts_with(" ") || Current.starts_with(",")) {
++I;
continue;
}
// Check if we are in _ExtVector. We do this first because
// extended vectors are written in template form with the syntax
// _ExtVector< ..., ...>, so we need to make sure we are not
// detecting the comma of the template class as a separator for
// the parameters of the prototype. Note: the assumption is that
// we cannot have nested _ExtVector.
if (Current.starts_with("_ExtVector<") ||
Current.starts_with("_Vector<")) {
const size_t EndTemplate = Current.find('>', 0);
ParseType(Current.substr(0, EndTemplate + 1));
// Move the prototype beyond _ExtVector<...>
I += EndTemplate + 1;
continue;
}
// We know that we are past _ExtVector, therefore the first seen
// comma is the boundary of a parameter in the prototype.
if (size_t CommaPos = Current.find(',', 0)) {
if (CommaPos != StringRef::npos) {
StringRef T = Current.substr(0, CommaPos);
ParseType(T);
// Move the prototype beyond the comma.
I += CommaPos + 1;
continue;
}
}
// No more commas, parse final parameter.
ParseType(Current);
I = end;
}
}
void ParseType(StringRef T) {
T = T.trim();
auto ConsumeAddrSpace = [&]() -> std::optional<unsigned> {
T = T.trim();
if (!T.consume_back(">"))
return std::nullopt;
auto Open = T.find_last_of('<');
if (Open == StringRef::npos)
PrintFatalError(Loc, "Mismatched angle-brackets in type");
StringRef ArgStr = T.substr(Open + 1);
T = T.slice(0, Open);
if (!T.consume_back("address_space"))
PrintFatalError(Loc,
"Only `address_space<N>` supported as a parameterized "
"pointer or reference type qualifier");
unsigned Number = 0;
if (ArgStr.getAsInteger(10, Number))
PrintFatalError(
Loc, "Expected an integer argument to the address_space qualifier");
if (Number == 0)
PrintFatalError(Loc, "No need for a qualifier for address space `0`");
return Number;
};
if (T.consume_back("*")) {
// Pointers may have an address space qualifier immediately before them.
std::optional<unsigned> AS = ConsumeAddrSpace();
ParseType(T);
Type += "*";
if (AS)
Type += std::to_string(*AS);
} else if (T.consume_back("const")) {
ParseType(T);
Type += "C";
} else if (T.consume_back("volatile")) {
ParseType(T);
Type += "D";
} else if (T.consume_back("restrict")) {
ParseType(T);
Type += "R";
} else if (T.consume_back("&")) {
// References may have an address space qualifier immediately before them.
std::optional<unsigned> AS = ConsumeAddrSpace();
ParseType(T);
Type += "&";
if (AS)
Type += std::to_string(*AS);
} else if (T.consume_back(")")) {
ParseType(T);
Type += "&";
} else if (EnableOpenCLLong && T.consume_front("long long")) {
Type += "O";
ParseType(T);
} else if (T.consume_front("long")) {
Type += "L";
ParseType(T);
} else if (T.consume_front("signed")) {
Type += "S";
ParseType(T);
} else if (T.consume_front("unsigned")) {
Type += "U";
ParseType(T);
} else if (T.consume_front("_Complex")) {
Type += "X";
ParseType(T);
} else if (T.consume_front("_Constant")) {
Type += "I";
ParseType(T);
} else if (T.consume_front("T")) {
if (Substitution.empty())
PrintFatalError(Loc, "Not a template");
ParseType(Substitution);
} else if (auto IsExt = T.consume_front("_ExtVector");
IsExt || T.consume_front("_Vector")) {
// Clang extended vector types are mangled as follows:
//
// '_ExtVector<' <lanes> ',' <scalar type> '>'
// Before parsing T(=<scalar type>), make sure the syntax of
// `_ExtVector<N, T>` is correct...
if (!T.consume_front("<"))
PrintFatalError(Loc, "Expected '<' after '_ExtVector'");
unsigned long long Lanes;
if (consumeUnsignedInteger(T, 10, Lanes))
PrintFatalError(Loc, "Expected number of lanes after '_ExtVector<'");
Type += (IsExt ? "E" : "V") + std::to_string(Lanes);
if (!T.consume_front(","))
PrintFatalError(Loc,
"Expected ',' after number of lanes in '_ExtVector<'");
if (!T.consume_back(">"))
PrintFatalError(
Loc, "Expected '>' after scalar type in '_ExtVector<N, type>'");
// ...all good, we can check if we have a valid `<scalar type>`.
ParseType(T);
} else {
auto ReturnTypeVal = StringSwitch<std::string>(T)
.Case("__builtin_va_list_ref", "A")
.Case("__builtin_va_list", "a")
.Case("__float128", "LLd")
.Case("__fp16", "h")
.Case("__int128_t", "LLLi")
.Case("_Float16", "x")
.Case("__bf16", "y")
.Case("bool", "b")
.Case("char", "c")
.Case("constant_CFString", "F")
.Case("double", "d")
.Case("FILE", "P")
.Case("float", "f")
.Case("id", "G")
.Case("int", "i")
.Case("int32_t", "Zi")
.Case("int64_t", "Wi")
.Case("jmp_buf", "J")
.Case("msint32_t", "Ni")
.Case("msuint32_t", "UNi")
.Case("objc_super", "M")
.Case("pid_t", "p")
.Case("ptrdiff_t", "Y")
.Case("SEL", "H")
.Case("short", "s")
.Case("sigjmp_buf", "SJ")
.Case("size_t", "z")
.Case("ucontext_t", "K")
.Case("uint32_t", "UZi")
.Case("uint64_t", "UWi")
.Case("void", "v")
.Case("wchar_t", "w")
.Case("...", ".")
.Default("error");
if (ReturnTypeVal == "error")
PrintFatalError(Loc, "Unknown Type: " + T);
Type += ReturnTypeVal;
}
}
public:
void Print(raw_ostream &OS) const { OS << ", \"" << Type << '\"'; }
private:
SMLoc Loc;
StringRef Substitution;
bool EnableOpenCLLong;
std::string Type;
};
class HeaderNameParser {
public:
HeaderNameParser(const Record *Builtin) {
for (char c : Builtin->getValueAsString("Header")) {
if (std::islower(c))
HeaderName += static_cast<char>(std::toupper(c));
else if (c == '.' || c == '_' || c == '/' || c == '-')
HeaderName += '_';
else
PrintFatalError(Builtin->getLoc(), "Unexpected header name");
}
}
void Print(raw_ostream &OS) const { OS << HeaderName; }
private:
std::string HeaderName;
};
void PrintAttributes(const Record *Builtin, BuiltinType BT, raw_ostream &OS) {
OS << '\"';
if (Builtin->isSubClassOf("LibBuiltin")) {
if (BT == BuiltinType::LibBuiltin) {
OS << 'f';
} else {
OS << 'F';
if (Builtin->getValueAsBit("OnlyBuiltinPrefixedAliasIsConstexpr"))
OS << 'E';
}
}
if (auto NS = Builtin->getValueAsOptionalString("Namespace")) {
if (NS != "std")
PrintFatalError(Builtin->getFieldLoc("Namespace"), "Unknown namespace: ");
OS << "z";
}
for (const auto *Attr : Builtin->getValueAsListOfDefs("Attributes")) {
OS << Attr->getValueAsString("Mangling");
if (Attr->isSubClassOf("IndexedAttribute")) {
OS << ':' << Attr->getValueAsInt("Index") << ':';
} else if (Attr->isSubClassOf("MultiIndexAttribute")) {
OS << '<';
llvm::ListSeparator Sep(",");
for (int64_t Index : Attr->getValueAsListOfInts("Indices"))
OS << Sep << Index;
OS << '>';
}
}
OS << '\"';
}
void EmitBuiltinDef(raw_ostream &OS, StringRef Substitution,
const Record *Builtin, Twine Spelling, BuiltinType BT) {
if (Builtin->getValueAsBit("RequiresUndef"))
OS << "#undef " << Spelling << '\n';
switch (BT) {
case BuiltinType::LibBuiltin:
OS << "LIBBUILTIN";
break;
case BuiltinType::LangBuiltin:
OS << "LANGBUILTIN";
break;
case BuiltinType::Builtin:
OS << "BUILTIN";
break;
case BuiltinType::AtomicBuiltin:
OS << "ATOMIC_BUILTIN";
break;
case BuiltinType::TargetBuiltin:
OS << "TARGET_BUILTIN";
break;
case BuiltinType::TargetLibBuiltin:
OS << "TARGET_HEADER_BUILTIN";
break;
}
OS << "(" << Spelling;
PrototypeParser{Substitution, Builtin}.Print(OS);
OS << ", ";
PrintAttributes(Builtin, BT, OS);
switch (BT) {
case BuiltinType::LibBuiltin: {
OS << ", ";
HeaderNameParser{Builtin}.Print(OS);
[[fallthrough]];
}
case BuiltinType::LangBuiltin: {
OS << ", " << Builtin->getValueAsString("Languages");
break;
}
case BuiltinType::TargetLibBuiltin: {
OS << ", ";
HeaderNameParser{Builtin}.Print(OS);
OS << ", " << Builtin->getValueAsString("Languages");
[[fallthrough]];
}
case BuiltinType::TargetBuiltin:
OS << ", \"" << Builtin->getValueAsString("Features") << "\"";
break;
case BuiltinType::AtomicBuiltin:
case BuiltinType::Builtin:
break;
}
OS << ")\n";
}
struct TemplateInsts {
std::vector<std::string> Substitution;
std::vector<std::string> Affix;
bool IsPrefix;
};
TemplateInsts getTemplateInsts(const Record *R) {
TemplateInsts temp;
auto Substitutions = R->getValueAsListOfStrings("Substitutions");
auto Affixes = R->getValueAsListOfStrings("Affixes");
temp.IsPrefix = R->getValueAsBit("AsPrefix");
if (Substitutions.size() != Affixes.size())
PrintFatalError(R->getLoc(), "Substitutions and affixes "
"don't have the same lengths");
for (auto [Affix, Substitution] : zip(Affixes, Substitutions)) {
temp.Substitution.emplace_back(Substitution);
temp.Affix.emplace_back(Affix);
}
return temp;
}
void EmitBuiltin(raw_ostream &OS, const Record *Builtin) {
TemplateInsts Templates = {};
if (Builtin->isSubClassOf("Template")) {
Templates = getTemplateInsts(Builtin);
} else {
Templates.Affix.emplace_back();
Templates.Substitution.emplace_back();
}
for (auto [Substitution, Affix] :
zip(Templates.Substitution, Templates.Affix)) {
for (StringRef Spelling : Builtin->getValueAsListOfStrings("Spellings")) {
auto FullSpelling =
(Templates.IsPrefix ? Affix + Spelling : Spelling + Affix).str();
BuiltinType BT = BuiltinType::Builtin;
if (Builtin->isSubClassOf("AtomicBuiltin")) {
BT = BuiltinType::AtomicBuiltin;
} else if (Builtin->isSubClassOf("LangBuiltin")) {
BT = BuiltinType::LangBuiltin;
} else if (Builtin->isSubClassOf("TargetLibBuiltin")) {
BT = BuiltinType::TargetLibBuiltin;
} else if (Builtin->isSubClassOf("TargetBuiltin")) {
BT = BuiltinType::TargetBuiltin;
} else if (Builtin->isSubClassOf("LibBuiltin")) {
BT = BuiltinType::LibBuiltin;
if (Builtin->getValueAsBit("AddBuiltinPrefixedAlias"))
EmitBuiltinDef(OS, Substitution, Builtin,
std::string("__builtin_") + FullSpelling,
BuiltinType::Builtin);
}
EmitBuiltinDef(OS, Substitution, Builtin, FullSpelling, BT);
}
}
}
} // namespace
void clang::EmitClangBuiltins(const RecordKeeper &Records, raw_ostream &OS) {
emitSourceFileHeader("List of builtins that Clang recognizes", OS);
OS << R"c++(
#if defined(BUILTIN) && !defined(LIBBUILTIN)
# define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
#endif
#if defined(BUILTIN) && !defined(LANGBUILTIN)
# define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
#endif
// Some of our atomics builtins are handled by AtomicExpr rather than
// as normal builtin CallExprs. This macro is used for such builtins.
#ifndef ATOMIC_BUILTIN
# define ATOMIC_BUILTIN(ID, TYPE, ATTRS) BUILTIN(ID, TYPE, ATTRS)
#endif
#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
#endif
#if defined(BUILTIN) && !defined(TARGET_HEADER_BUILTIN)
# define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS)
#endif
)c++";
// AtomicBuiltins are order dependent
// emit them first to make manual checking easier
for (const auto *Builtin : Records.getAllDerivedDefinitions("AtomicBuiltin"))
EmitBuiltin(OS, Builtin);
for (const auto *Builtin : Records.getAllDerivedDefinitions("Builtin")) {
if (Builtin->isSubClassOf("AtomicBuiltin"))
continue;
EmitBuiltin(OS, Builtin);
}
OS << R"c++(
#undef ATOMIC_BUILTIN
#undef BUILTIN
#undef LIBBUILTIN
#undef LANGBUILTIN
#undef TARGET_BUILTIN
#undef TARGET_HEADER_BUILTIN
)c++";
}