llvm-project/llvm/tools/llvm-strings/llvm-strings.cpp
Chandler Carruth dd647e3e60
Rework the Option library to reduce dynamic relocations (#119198)
Apologies for the large change, I looked for ways to break this up and
all of the ones I saw added real complexity. This change focuses on the
option's prefixed names and the array of prefixes. These are present in
every option and the dominant source of dynamic relocations for PIE or
PIC users of LLVM and Clang tooling. In some cases, 100s or 1000s of
them for the Clang driver which has a huge number of options.

This PR addresses this by building a string table and a prefixes table
that can be referenced with indices rather than pointers that require
dynamic relocations. This removes almost 7k dynmaic relocations from the
`clang` binary, roughly 8% of the remaining dynmaic relocations outside
of vtables. For busy-boxing use cases where many different option tables
are linked into the same binary, the savings add up a bit more.

The string table is a straightforward mechanism, but the prefixes
required some subtlety. They are encoded in a Pascal-string fashion with
a size followed by a sequence of offsets. This works relatively well for
the small realistic prefixes arrays in use.

Lots of code has to change in order to land this though: both all the
option library code has to be updated to use the string table and
prefixes table, and all the users of the options library have to be
updated to correctly instantiate the objects.

Some follow-up patches in the works to provide an abstraction for this
style of code, and to start using the same technique for some of the
other strings here now that the infrastructure is in place.
2024-12-11 15:44:44 -08:00

188 lines
5.3 KiB
C++

//===-- llvm-strings.cpp - Printable String dumping utility ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This program is a utility that works like binutils "strings", that is, it
// prints out printable strings in a binary, objdump, or archive file.
//
//===----------------------------------------------------------------------===//
#include "Opts.inc"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Object/Binary.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/WithColor.h"
#include <cctype>
#include <string>
using namespace llvm;
using namespace llvm::object;
namespace {
enum ID {
OPT_INVALID = 0, // This is not an option ID.
#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
#include "Opts.inc"
#undef OPTION
};
#define OPTTABLE_STR_TABLE_CODE
#include "Opts.inc"
#undef OPTTABLE_STR_TABLE_CODE
#define OPTTABLE_PREFIXES_TABLE_CODE
#include "Opts.inc"
#undef OPTTABLE_PREFIXES_TABLE_CODE
using namespace llvm::opt;
static constexpr opt::OptTable::Info InfoTable[] = {
#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
#include "Opts.inc"
#undef OPTION
};
class StringsOptTable : public opt::GenericOptTable {
public:
StringsOptTable()
: GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
setGroupedShortOptions(true);
setDashDashParsing(true);
}
};
} // namespace
static StringRef ToolName;
static cl::list<std::string> InputFileNames(cl::Positional,
cl::desc("<input object files>"));
static int MinLength = 4;
static bool PrintFileName;
enum radix { none, octal, hexadecimal, decimal };
static radix Radix;
[[noreturn]] static void reportCmdLineError(const Twine &Message) {
WithColor::error(errs(), ToolName) << Message << "\n";
exit(1);
}
template <typename T>
static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
if (const opt::Arg *A = Args.getLastArg(ID)) {
StringRef V(A->getValue());
if (!llvm::to_integer(V, Value, 0) || Value <= 0)
reportCmdLineError("expected a positive integer, but got '" + V + "'");
}
}
static void strings(raw_ostream &OS, StringRef FileName, StringRef Contents) {
auto print = [&OS, FileName](unsigned Offset, StringRef L) {
if (L.size() < static_cast<size_t>(MinLength))
return;
if (PrintFileName)
OS << FileName << ": ";
switch (Radix) {
case none:
break;
case octal:
OS << format("%7o ", Offset);
break;
case hexadecimal:
OS << format("%7x ", Offset);
break;
case decimal:
OS << format("%7u ", Offset);
break;
}
OS << L << '\n';
};
const char *B = Contents.begin();
const char *P = nullptr, *E = nullptr, *S = nullptr;
for (P = Contents.begin(), E = Contents.end(); P < E; ++P) {
if (isPrint(*P) || *P == '\t') {
if (S == nullptr)
S = P;
} else if (S) {
print(S - B, StringRef(S, P - S));
S = nullptr;
}
}
if (S)
print(S - B, StringRef(S, E - S));
}
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
BumpPtrAllocator A;
StringSaver Saver(A);
StringsOptTable Tbl;
ToolName = argv[0];
opt::InputArgList Args =
Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver,
[&](StringRef Msg) { reportCmdLineError(Msg); });
if (Args.hasArg(OPT_help)) {
Tbl.printHelp(
outs(),
(Twine(ToolName) + " [options] <input object files>").str().c_str(),
"llvm string dumper");
// TODO Replace this with OptTable API once it adds extrahelp support.
outs() << "\nPass @FILE as argument to read options from FILE.\n";
return 0;
}
if (Args.hasArg(OPT_version)) {
outs() << ToolName << '\n';
cl::PrintVersionMessage();
return 0;
}
parseIntArg(Args, OPT_bytes_EQ, MinLength);
PrintFileName = Args.hasArg(OPT_print_file_name);
StringRef R = Args.getLastArgValue(OPT_radix_EQ);
if (R.empty())
Radix = none;
else if (R == "o")
Radix = octal;
else if (R == "d")
Radix = decimal;
else if (R == "x")
Radix = hexadecimal;
else
reportCmdLineError("--radix value should be one of: '' (no offset), 'o' "
"(octal), 'd' (decimal), 'x' (hexadecimal)");
if (MinLength == 0) {
errs() << "invalid minimum string length 0\n";
return EXIT_FAILURE;
}
std::vector<std::string> InputFileNames = Args.getAllArgValues(OPT_INPUT);
if (InputFileNames.empty())
InputFileNames.push_back("-");
for (const auto &File : InputFileNames) {
ErrorOr<std::unique_ptr<MemoryBuffer>> Buffer =
MemoryBuffer::getFileOrSTDIN(File, /*IsText=*/true);
if (std::error_code EC = Buffer.getError())
errs() << File << ": " << EC.message() << '\n';
else
strings(llvm::outs(), File == "-" ? "{standard input}" : File,
Buffer.get()->getMemBufferRef().getBuffer());
}
return EXIT_SUCCESS;
}