llvm-project/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp

//===--- FormatStringConverter.cpp - clang-tidy----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Implementation of the FormatStringConverter class which is used to convert
/// printf format strings to C++ std::formatter format strings.
///
//===----------------------------------------------------------------------===//

#include "FormatStringConverter.h"
#include "../utils/FixItHintUtils.h"
#include "clang/AST/Expr.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/FixIt.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Debug.h"

using namespace clang::ast_matchers;
using namespace clang::analyze_printf;

namespace clang::tidy::utils {
using clang::analyze_format_string::ConversionSpecifier;

/// Is the passed type the actual "char" type, whether that be signed or
/// unsigned, rather than explicit signed char or unsigned char types.
static bool isRealCharType(const clang::QualType &Ty) {
  using namespace clang;
  const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
  if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType))
    return (BT->getKind() == BuiltinType::Char_U ||
            BT->getKind() == BuiltinType::Char_S);
  return false;
}

/// If possible, return the text name of the signed type that corresponds to the
/// passed integer type. If the passed type is already signed then its name is
/// just returned. Only supports BuiltinTypes.
static std::optional<std::string>
getCorrespondingSignedTypeName(const clang::QualType &QT) {
  using namespace clang;
  const auto UQT = QT.getUnqualifiedType();
  if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
    switch (BT->getKind()) {
    case BuiltinType::UChar:
    case BuiltinType::Char_U:
    case BuiltinType::SChar:
    case BuiltinType::Char_S:
      return "signed char";
    case BuiltinType::UShort:
    case BuiltinType::Short:
      return "short";
    case BuiltinType::UInt:
    case BuiltinType::Int:
      return "int";
    case BuiltinType::ULong:
    case BuiltinType::Long:
      return "long";
    case BuiltinType::ULongLong:
    case BuiltinType::LongLong:
      return "long long";
    default:
      llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
                   << QT.getAsString() << "'\n";
      return std::nullopt;
    }
  }

  // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
  // if the argument type does.
  const std::string TypeName = UQT.getAsString();
  StringRef SimplifiedTypeName{TypeName};
  const bool InStd = SimplifiedTypeName.consume_front("std::");
  const StringRef Prefix = InStd ? "std::" : "";

  if (SimplifiedTypeName.starts_with("uint") &&
      SimplifiedTypeName.ends_with("_t"))
    return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();

  if (SimplifiedTypeName == "size_t")
    return (Twine(Prefix) + "ssize_t").str();

  llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
               << UQT.getAsString() << "'\n";
  return std::nullopt;
}

/// If possible, return the text name of the unsigned type that corresponds to
/// the passed integer type. If the passed type is already unsigned then its
/// name is just returned. Only supports BuiltinTypes.
static std::optional<std::string>
getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
  using namespace clang;
  const auto UQT = QT.getUnqualifiedType();
  if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
    switch (BT->getKind()) {
    case BuiltinType::SChar:
    case BuiltinType::Char_S:
    case BuiltinType::UChar:
    case BuiltinType::Char_U:
      return "unsigned char";
    case BuiltinType::Short:
    case BuiltinType::UShort:
      return "unsigned short";
    case BuiltinType::Int:
    case BuiltinType::UInt:
      return "unsigned int";
    case BuiltinType::Long:
    case BuiltinType::ULong:
      return "unsigned long";
    case BuiltinType::LongLong:
    case BuiltinType::ULongLong:
      return "unsigned long long";
    default:
      llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
                   << UQT.getAsString() << "'\n";
      return std::nullopt;
    }
  }

  // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
  // if the argument type does.
  const std::string TypeName = UQT.getAsString();
  StringRef SimplifiedTypeName{TypeName};
  const bool InStd = SimplifiedTypeName.consume_front("std::");
  const StringRef Prefix = InStd ? "std::" : "";

  if (SimplifiedTypeName.starts_with("int") &&
      SimplifiedTypeName.ends_with("_t"))
    return (Twine(Prefix) + "u" + SimplifiedTypeName).str();

  if (SimplifiedTypeName == "ssize_t")
    return (Twine(Prefix) + "size_t").str();
  if (SimplifiedTypeName == "ptrdiff_t")
    return (Twine(Prefix) + "size_t").str();

  llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
               << UQT.getAsString() << "'\n";
  return std::nullopt;
}

static std::optional<std::string>
castTypeForArgument(ConversionSpecifier::Kind ArgKind,
                    const clang::QualType &QT) {
  if (ArgKind == ConversionSpecifier::Kind::uArg)
    return getCorrespondingUnsignedTypeName(QT);
  return getCorrespondingSignedTypeName(QT);
}

static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
                                 const clang::QualType &ArgType) {
  if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) {
    // Unadorned char never matches any expected signedness since it
    // could be signed or unsigned.
    const auto ArgTypeKind = BT->getKind();
    if (ArgTypeKind == BuiltinType::Char_U ||
        ArgTypeKind == BuiltinType::Char_S)
      return false;
  }

  if (ArgKind == ConversionSpecifier::Kind::uArg)
    return ArgType->isUnsignedIntegerType();
  return ArgType->isSignedIntegerType();
}

namespace {
AST_MATCHER(clang::QualType, isRealChar) {
  return clang::tidy::utils::isRealCharType(Node);
}
} // namespace

static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
  /// For printf-style functions, the signedness of the type printed is
  /// indicated by the corresponding type in the format string.
  /// std::print will determine the signedness from the type of the
  /// argument. This means that it is necessary to generate a cast in
  /// StrictMode to ensure that the exact behaviour is maintained.
  /// However, for templated functions like absl::PrintF and
  /// fmt::printf, the signedness of the type printed is also taken from
  /// the actual argument like std::print, so such casts are never
  /// necessary. printf-style functions are variadic, whereas templated
  /// ones aren't, so we can use that to distinguish between the two
  /// cases.
  if (StrictMode) {
    const FunctionDecl *FuncDecl = Call->getDirectCallee();
    assert(FuncDecl);
    return FuncDecl->isVariadic();
  }
  return false;
}

FormatStringConverter::FormatStringConverter(
    ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset,
    const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
    Preprocessor &PP)
    : Context(ContextIn), Config(ConfigIn),
      CastMismatchedIntegerTypes(
          castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
      Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
      ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
  assert(ArgsOffset <= NumArgs);
  FormatExpr = llvm::dyn_cast<StringLiteral>(
      Args[FormatArgOffset]->IgnoreUnlessSpelledInSource());

  assert(FormatExpr && FormatExpr->isOrdinary());

  if (const std::optional<StringRef> MaybeMacroName =
          formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP);
      MaybeMacroName) {
    conversionNotPossible(
        ("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
            .str());
    return;
  }

  PrintfFormatString = FormatExpr->getString();

  // Assume that the output will be approximately the same size as the input,
  // but perhaps with a few escapes expanded.
  const size_t EstimatedGrowth = 8;
  StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth);
  StandardFormatString.push_back('\"');

  const bool IsFreeBsdkPrintf = false;

  using clang::analyze_format_string::ParsePrintfString;
  ParsePrintfString(*this, PrintfFormatString.data(),
                    PrintfFormatString.data() + PrintfFormatString.size(),
                    LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf);
  finalizeFormatText();
}

std::optional<StringRef>
FormatStringConverter::formatStringContainsUnreplaceableMacro(
    const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM,
    Preprocessor &PP) {
  // If a macro invocation surrounds the entire call then we don't want that to
  // inhibit conversion. The whole format string will appear to come from that
  // macro, as will the function call.
  std::optional<StringRef> MaybeSurroundingMacroName;
  if (SourceLocation BeginCallLoc = Call->getBeginLoc();
      BeginCallLoc.isMacroID())
    MaybeSurroundingMacroName =
        Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts());

  for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
       I != E; ++I) {
    const SourceLocation &TokenLoc = *I;
    if (TokenLoc.isMacroID()) {
      const StringRef MacroName =
          Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts());

      if (MaybeSurroundingMacroName != MacroName) {
        // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
        // for types that change size so we must look for multiple prefixes.
        if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI"))
          return MacroName;

        const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc);
        const OptionalFileEntryRef MaybeFileEntry =
            SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc));
        if (!MaybeFileEntry)
          return MacroName;

        HeaderSearch &HS = PP.getHeaderSearchInfo();
        // Check if the file is a system header
        if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) ||
            llvm::sys::path::filename(MaybeFileEntry->getName()) !=
                "inttypes.h")
          return MacroName;
      }
    }
  }
  return std::nullopt;
}

void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
                                          std::string &FormatSpec) {
  ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();

  // We only care about alignment if a field width is specified
  if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
    if (ArgKind == ConversionSpecifier::sArg) {
      // Strings are left-aligned by default with std::format, so we only
      // need to emit an alignment if this one needs to be right aligned.
      if (!FS.isLeftJustified())
        FormatSpec.push_back('>');
    } else {
      // Numbers are right-aligned by default with std::format, so we only
      // need to emit an alignment if this one needs to be left aligned.
      if (FS.isLeftJustified())
        FormatSpec.push_back('<');
    }
  }
}

void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
                                     std::string &FormatSpec) {
  const ConversionSpecifier Spec = FS.getConversionSpecifier();

  // Ignore on something that isn't numeric. For printf it's would be a
  // compile-time warning but ignored at runtime, but for std::format it
  // ought to be a compile-time error.
  if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
    // + is preferred to ' '
    if (FS.hasPlusPrefix())
      FormatSpec.push_back('+');
    else if (FS.hasSpacePrefix())
      FormatSpec.push_back(' ');
  }
}

void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
                                                std::string &FormatSpec) {
  if (FS.hasAlternativeForm()) {
    switch (FS.getConversionSpecifier().getKind()) {
    case ConversionSpecifier::Kind::aArg:
    case ConversionSpecifier::Kind::AArg:
    case ConversionSpecifier::Kind::eArg:
    case ConversionSpecifier::Kind::EArg:
    case ConversionSpecifier::Kind::fArg:
    case ConversionSpecifier::Kind::FArg:
    case ConversionSpecifier::Kind::gArg:
    case ConversionSpecifier::Kind::GArg:
    case ConversionSpecifier::Kind::xArg:
    case ConversionSpecifier::Kind::XArg:
    case ConversionSpecifier::Kind::oArg:
      FormatSpec.push_back('#');
      break;
    default:
      // Alternative forms don't exist for other argument kinds
      break;
    }
  }
}

void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
                                           std::string &FormatSpec) {
  {
    const OptionalAmount FieldWidth = FS.getFieldWidth();
    switch (FieldWidth.getHowSpecified()) {
    case OptionalAmount::NotSpecified:
      break;
    case OptionalAmount::Constant:
      FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount()));
      break;
    case OptionalAmount::Arg:
      FormatSpec.push_back('{');
      if (FieldWidth.usesPositionalArg()) {
        // std::format argument identifiers are zero-based, whereas printf
        // ones are one based.
        assert(FieldWidth.getPositionalArgIndex() > 0U);
        FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1));
      }
      FormatSpec.push_back('}');
      break;
    case OptionalAmount::Invalid:
      break;
    }
  }
}

void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
                                          std::string &FormatSpec) {
  const OptionalAmount FieldPrecision = FS.getPrecision();
  switch (FieldPrecision.getHowSpecified()) {
  case OptionalAmount::NotSpecified:
    break;
  case OptionalAmount::Constant:
    FormatSpec.push_back('.');
    FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount()));
    break;
  case OptionalAmount::Arg:
    FormatSpec.push_back('.');
    FormatSpec.push_back('{');
    if (FieldPrecision.usesPositionalArg()) {
      // std::format argument identifiers are zero-based, whereas printf
      // ones are one based.
      assert(FieldPrecision.getPositionalArgIndex() > 0U);
      FormatSpec.append(
          llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1));
    }
    FormatSpec.push_back('}');
    break;
  case OptionalAmount::Invalid:
    break;
  }
}

void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
  unsigned ArgCount = 0;
  const OptionalAmount FieldWidth = FS.getFieldWidth();
  const OptionalAmount FieldPrecision = FS.getPrecision();

  if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
      !FieldWidth.usesPositionalArg())
    ++ArgCount;
  if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
      !FieldPrecision.usesPositionalArg())
    ++ArgCount;

  if (ArgCount)
    ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount);
}

void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
                                               const Expr *Arg) {
  // If the argument is the result of a call to std::string::c_str() or
  // data() with a return type of char then we can remove that call and
  // pass the std::string directly. We don't want to do so if the return
  // type is not a char pointer (though it's unlikely that such code would
  // compile without warnings anyway.) See RedundantStringCStrCheck.

  if (!StringCStrCallExprMatcher) {
    // Lazily create the matcher
    const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType(
        hasDeclaration(cxxRecordDecl(hasName("::std::basic_string"))))));
    const auto StringExpr = expr(
        anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl)))));

    StringCStrCallExprMatcher =
        cxxMemberCallExpr(
            on(StringExpr.bind("arg")), callee(memberExpr().bind("member")),
            callee(cxxMethodDecl(hasAnyName("c_str", "data"),
                                 returns(pointerType(pointee(isRealChar()))))))
            .bind("call");
  }

  auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context);
  if (CStrMatches.size() == 1)
    ArgCStrRemovals.push_back(CStrMatches.front());
  else if (Arg->getType()->isPointerType()) {
    const QualType Pointee = Arg->getType()->getPointeeType();
    // printf is happy to print signed char and unsigned char strings, but
    // std::format only likes char strings.
    if (Pointee->isCharType() && !isRealCharType(Pointee))
      ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>(");
  }
}

bool FormatStringConverter::emitIntegerArgument(
    ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
    std::string &FormatSpec) {
  const clang::QualType &ArgType = Arg->getType();
  if (ArgType->isBooleanType()) {
    // std::format will print bool as either "true" or "false" by default,
    // but printf prints them as "0" or "1". Be compatible with printf by
    // requesting decimal output.
    FormatSpec.push_back('d');
  } else if (ArgType->isEnumeralType()) {
    // std::format will try to find a specialization to print the enum
    // (and probably fail), whereas printf would have just expected it to
    // be passed as its underlying type. However, printf will have forced
    // the signedness based on the format string, so we need to do the
    // same.
    if (const auto *ET = ArgType->getAs<EnumType>()) {
      if (const std::optional<std::string> MaybeCastType = castTypeForArgument(
              ArgKind,
              ET->getOriginalDecl()->getDefinitionOrSelf()->getIntegerType()))
        ArgFixes.emplace_back(
            ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
      else
        return conversionNotPossible(
            (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
                .str());
    }
  } else if (CastMismatchedIntegerTypes &&
             !isMatchingSignedness(ArgKind, ArgType)) {
    // printf will happily print an unsigned type as signed if told to.
    // Even -Wformat doesn't warn for this. std::format will format as
    // unsigned unless we cast it.
    if (const std::optional<std::string> MaybeCastType =
            castTypeForArgument(ArgKind, ArgType))
      ArgFixes.emplace_back(
          ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
    else
      return conversionNotPossible(
          (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
           Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
                                                            : "signed") +
           " integer type to match format"
           " specifier and StrictMode is enabled")
              .str());
  } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) {
    // Only specify integer if the argument is of a different type
    FormatSpec.push_back('d');
  }
  return true;
}

/// Append the corresponding standard format string type fragment to FormatSpec,
/// and store any argument fixes for later application.
/// @returns true on success, false on failure
bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
                                     std::string &FormatSpec) {
  ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
  switch (ArgKind) {
  case ConversionSpecifier::Kind::sArg:
    emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg);
    break;
  case ConversionSpecifier::Kind::cArg:
    // The type must be "c" to get a character unless the type is exactly
    // char (whether that be signed or unsigned for the target.)
    if (!isRealCharType(Arg->getType()))
      FormatSpec.push_back('c');
    break;
  case ConversionSpecifier::Kind::dArg:
  case ConversionSpecifier::Kind::iArg:
  case ConversionSpecifier::Kind::uArg:
    if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset,
                             FormatSpec))
      return false;
    break;
  case ConversionSpecifier::Kind::pArg: {
    const clang::QualType &ArgType = Arg->getType();
    // std::format knows how to format void pointers and nullptrs
    if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
      ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset,
                            "static_cast<const void *>(");
    break;
  }
  case ConversionSpecifier::Kind::xArg:
    FormatSpec.push_back('x');
    break;
  case ConversionSpecifier::Kind::XArg:
    FormatSpec.push_back('X');
    break;
  case ConversionSpecifier::Kind::oArg:
    FormatSpec.push_back('o');
    break;
  case ConversionSpecifier::Kind::aArg:
    FormatSpec.push_back('a');
    break;
  case ConversionSpecifier::Kind::AArg:
    FormatSpec.push_back('A');
    break;
  case ConversionSpecifier::Kind::eArg:
    FormatSpec.push_back('e');
    break;
  case ConversionSpecifier::Kind::EArg:
    FormatSpec.push_back('E');
    break;
  case ConversionSpecifier::Kind::fArg:
    FormatSpec.push_back('f');
    break;
  case ConversionSpecifier::Kind::FArg:
    FormatSpec.push_back('F');
    break;
  case ConversionSpecifier::Kind::gArg:
    FormatSpec.push_back('g');
    break;
  case ConversionSpecifier::Kind::GArg:
    FormatSpec.push_back('G');
    break;
  default:
    // Something we don't understand
    return conversionNotPossible((Twine("argument ") +
                                  Twine(FS.getArgIndex() + ArgsOffset) +
                                  " has an unsupported format specifier")
                                     .str());
  }

  return true;
}

/// Append the standard format string equivalent of the passed PrintfSpecifier
/// to StandardFormatString and store any argument fixes for later application.
/// @returns true on success, false on failure
bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
                                            const Expr *Arg,
                                            std::string &StandardFormatString) {
  // The specifier must have an associated argument
  assert(FS.consumesDataArgument());

  StandardFormatString.push_back('{');

  if (FS.usesPositionalArg()) {
    // std::format argument identifiers are zero-based, whereas printf ones
    // are one based.
    assert(FS.getPositionalArgIndex() > 0U);
    StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1));
  }

  // std::format format argument parts to potentially emit:
  // [[fill]align][sign]["#"]["0"][width]["."precision][type]
  std::string FormatSpec;

  // printf doesn't support specifying the fill character - it's always a
  // space, so we never need to generate one.

  emitAlignment(FS, FormatSpec);
  emitSign(FS, FormatSpec);
  emitAlternativeForm(FS, FormatSpec);

  if (FS.hasLeadingZeros())
    FormatSpec.push_back('0');

  emitFieldWidth(FS, FormatSpec);
  emitPrecision(FS, FormatSpec);
  maybeRotateArguments(FS);

  if (!emitType(FS, Arg, FormatSpec))
    return false;

  if (!FormatSpec.empty()) {
    StandardFormatString.push_back(':');
    StandardFormatString.append(FormatSpec);
  }

  StandardFormatString.push_back('}');
  return true;
}

/// Called for each format specifier by ParsePrintfString.
bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
                                                  const char *StartSpecifier,
                                                  unsigned SpecifierLen,
                                                  const TargetInfo &Target) {

  const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
  assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());

  // Everything before the specifier needs copying verbatim
  assert(StartSpecifierPos >= PrintfFormatStringPos);

  appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
                             StartSpecifierPos - PrintfFormatStringPos));

  const ConversionSpecifier::Kind ArgKind =
      FS.getConversionSpecifier().getKind();

  // Skip over specifier
  PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
  assert(PrintfFormatStringPos <= PrintfFormatString.size());

  FormatStringNeededRewriting = true;

  if (ArgKind == ConversionSpecifier::Kind::nArg) {
    // std::print doesn't do the equivalent of %n
    return conversionNotPossible("'%n' is not supported in format string");
  }

  if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
    // std::print doesn't support %m. In theory we could insert a
    // strerror(errno) parameter (assuming that libc has a thread-safe
    // implementation, which glibc does), but that would require keeping track
    // of the input and output parameter indices for position arguments too.
    return conversionNotPossible("'%m' is not supported in format string");
  }

  if (ArgKind == ConversionSpecifier::PercentArg) {
    StandardFormatString.push_back('%');
    return true;
  }

  const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
  if (ArgIndex >= NumArgs) {
    // Argument index out of range. Give up.
    return conversionNotPossible(
        (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
            .str());
  }

  return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(),
                         StandardFormatString);
}

/// Called at the very end just before applying fixes to capture the last part
/// of the format string.
void FormatStringConverter::finalizeFormatText() {
  appendFormatText(
      StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
                PrintfFormatString.size() - PrintfFormatStringPos));
  PrintfFormatStringPos = PrintfFormatString.size();

  // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
  // than to std::println("Hello\r");
  // Use StringRef until C++20 std::string::ends_with() is available.
  const auto StandardFormatStringRef = StringRef(StandardFormatString);
  if (Config.AllowTrailingNewlineRemoval &&
      StandardFormatStringRef.ends_with("\\n") &&
      !StandardFormatStringRef.ends_with("\\\\n") &&
      !StandardFormatStringRef.ends_with("\\r\\n")) {
    UsePrintNewlineFunction = true;
    FormatStringNeededRewriting = true;
    StandardFormatString.erase(StandardFormatString.end() - 2,
                               StandardFormatString.end());
  }

  StandardFormatString.push_back('\"');
}

/// Append literal parts of the format text, reinstating escapes as required.
void FormatStringConverter::appendFormatText(const StringRef Text) {
  for (const char Ch : Text) {
    if (Ch == '\a')
      StandardFormatString += "\\a";
    else if (Ch == '\b')
      StandardFormatString += "\\b";
    else if (Ch == '\f')
      StandardFormatString += "\\f";
    else if (Ch == '\n')
      StandardFormatString += "\\n";
    else if (Ch == '\r')
      StandardFormatString += "\\r";
    else if (Ch == '\t')
      StandardFormatString += "\\t";
    else if (Ch == '\v')
      StandardFormatString += "\\v";
    else if (Ch == '\"')
      StandardFormatString += "\\\"";
    else if (Ch == '\\')
      StandardFormatString += "\\\\";
    else if (Ch == '{') {
      StandardFormatString += "{{";
      FormatStringNeededRewriting = true;
    } else if (Ch == '}') {
      StandardFormatString += "}}";
      FormatStringNeededRewriting = true;
    } else if (Ch < 32) {
      StandardFormatString += "\\x";
      StandardFormatString += llvm::hexdigit(Ch >> 4, true);
      StandardFormatString += llvm::hexdigit(Ch & 0xf, true);
    } else
      StandardFormatString += Ch;
  }
}

static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
                                          ASTContext &Context) {
  const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg");
  const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member");
  const bool Arrow = Member->isArrow();
  return Arrow ? utils::fixit::formatDereference(*Arg, Context)
               : tooling::fixit::getText(*Arg, Context).str();
}

/// Called by the check when it is ready to apply the fixes.
void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
                                       SourceManager &SM) {
  if (FormatStringNeededRewriting) {
    Diag << FixItHint::CreateReplacement(
        CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(),
                                       FormatExpr->getEndLoc()),
        StandardFormatString);
  }

  // ArgCount is one less than the number of arguments to be rotated.
  for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
    assert(ValueArgIndex < NumArgs);
    assert(ValueArgIndex > ArgCount);

    // First move the value argument to the right place. But if there's a
    // pending c_str() removal then we must do that at the same time.
    if (const auto CStrRemovalMatch =
            std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(),
                         [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
                             const BoundNodes &Match) {
                           // This c_str() removal corresponds to the argument
                           // being moved if they start at the same location.
                           const Expr *CStrArg = Match.getNodeAs<Expr>("arg");
                           return ArgStartPos == CStrArg->getBeginLoc();
                         });
        CStrRemovalMatch != ArgCStrRemovals.end()) {
      const std::string ArgText =
          withoutCStrReplacement(*CStrRemovalMatch, *Context);
      assert(!ArgText.empty());

      Diag << FixItHint::CreateReplacement(
          Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);

      // That c_str() removal is now dealt with, so we don't need to do it again
      ArgCStrRemovals.erase(CStrRemovalMatch);
    } else
      Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount],
                                                *Args[ValueArgIndex], *Context);

    // Now shift down the field width and precision (if either are present) to
    // accommodate it.
    for (size_t Offset = 0; Offset < ArgCount; ++Offset)
      Diag << tooling::fixit::createReplacement(
          *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1],
          *Context);

    // Now we need to modify the ArgFix index too so that we fix the right
    // argument. We don't need to care about the width and precision indices
    // since they never need fixing.
    for (auto &ArgFix : ArgFixes) {
      if (ArgFix.ArgIndex == ValueArgIndex)
        ArgFix.ArgIndex = ValueArgIndex - ArgCount;
    }
  }

  for (const auto &[ArgIndex, Replacement] : ArgFixes) {
    SourceLocation AfterOtherSide =
        Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts)
            ->getLocation();

    Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(),
                                       Replacement, true)
         << FixItHint::CreateInsertion(AfterOtherSide, ")", true);
  }

  for (const auto &Match : ArgCStrRemovals) {
    const auto *Call = Match.getNodeAs<CallExpr>("call");
    const std::string ArgText = withoutCStrReplacement(Match, *Context);
    if (!ArgText.empty())
      Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
  }
}
} // namespace clang::tidy::utils