Alexander Kornienko a594ba8a76 Always break before the colon in constructor initializers, when
BreakConstructorInitializersBeforeComma is true.

This option is used in WebKit style, so this also ensures initializer lists are
not put on a single line, as per the WebKit coding guidelines.

Patch by Florian Sowade!

llvm-svn: 197386
2013-12-16 14:35:51 +00:00

1768 lines
67 KiB
C++

//===--- Format.cpp - Format C++ code -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements functions declared in Format.h. This will be
/// split into separate files as we go.
///
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "format-formatter"
#include "ContinuationIndenter.h"
#include "TokenAnnotator.h"
#include "UnwrappedLineParser.h"
#include "WhitespaceManager.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/Path.h"
#include <queue>
#include <string>
using clang::format::FormatStyle;
namespace llvm {
namespace yaml {
template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
}
};
template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
}
};
template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
IO.enumCase(Value, "Never", FormatStyle::UT_Never);
IO.enumCase(Value, "false", FormatStyle::UT_Never);
IO.enumCase(Value, "Always", FormatStyle::UT_Always);
IO.enumCase(Value, "true", FormatStyle::UT_Always);
IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
}
};
template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
}
};
template <>
struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
static void enumeration(IO &IO,
FormatStyle::NamespaceIndentationKind &Value) {
IO.enumCase(Value, "None", FormatStyle::NI_None);
IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
IO.enumCase(Value, "All", FormatStyle::NI_All);
}
};
template <>
struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
static void enumeration(IO &IO,
FormatStyle::SpaceBeforeParensOptions &Value) {
IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
IO.enumCase(Value, "ControlStatements",
FormatStyle::SBPO_ControlStatements);
IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
// For backward compatibility.
IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
}
};
template <> struct MappingTraits<FormatStyle> {
static void mapping(IO &IO, FormatStyle &Style) {
// When reading, read the language first, we need it for getPredefinedStyle.
IO.mapOptional("Language", Style.Language);
if (IO.outputting()) {
StringRef StylesArray[] = { "LLVM", "Google", "Chromium",
"Mozilla", "WebKit", "GNU" };
ArrayRef<StringRef> Styles(StylesArray);
for (size_t i = 0, e = Styles.size(); i < e; ++i) {
StringRef StyleName(Styles[i]);
FormatStyle PredefinedStyle;
if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
Style == PredefinedStyle) {
IO.mapOptional("# BasedOnStyle", StyleName);
break;
}
}
} else {
StringRef BasedOnStyle;
IO.mapOptional("BasedOnStyle", BasedOnStyle);
if (!BasedOnStyle.empty()) {
FormatStyle::LanguageKind OldLanguage = Style.Language;
FormatStyle::LanguageKind Language =
((FormatStyle *)IO.getContext())->Language;
if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
return;
}
Style.Language = OldLanguage;
}
}
IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
IO.mapOptional("ConstructorInitializerIndentWidth",
Style.ConstructorInitializerIndentWidth);
IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
Style.AllowAllParametersOfDeclarationOnNextLine);
IO.mapOptional("AllowShortIfStatementsOnASingleLine",
Style.AllowShortIfStatementsOnASingleLine);
IO.mapOptional("AllowShortLoopsOnASingleLine",
Style.AllowShortLoopsOnASingleLine);
IO.mapOptional("AllowShortFunctionsOnASingleLine",
Style.AllowShortFunctionsOnASingleLine);
IO.mapOptional("AlwaysBreakTemplateDeclarations",
Style.AlwaysBreakTemplateDeclarations);
IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
Style.AlwaysBreakBeforeMultilineStrings);
IO.mapOptional("BreakBeforeBinaryOperators",
Style.BreakBeforeBinaryOperators);
IO.mapOptional("BreakBeforeTernaryOperators",
Style.BreakBeforeTernaryOperators);
IO.mapOptional("BreakConstructorInitializersBeforeComma",
Style.BreakConstructorInitializersBeforeComma);
IO.mapOptional("BinPackParameters", Style.BinPackParameters);
IO.mapOptional("ColumnLimit", Style.ColumnLimit);
IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
IO.mapOptional("ExperimentalAutoDetectBinPacking",
Style.ExperimentalAutoDetectBinPacking);
IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
IO.mapOptional("ObjCSpaceBeforeProtocolList",
Style.ObjCSpaceBeforeProtocolList);
IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
Style.PenaltyBreakBeforeFirstCallParameter);
IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
IO.mapOptional("PenaltyBreakFirstLessLess",
Style.PenaltyBreakFirstLessLess);
IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
Style.PenaltyReturnTypeOnItsOwnLine);
IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
IO.mapOptional("SpacesBeforeTrailingComments",
Style.SpacesBeforeTrailingComments);
IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
IO.mapOptional("Standard", Style.Standard);
IO.mapOptional("IndentWidth", Style.IndentWidth);
IO.mapOptional("TabWidth", Style.TabWidth);
IO.mapOptional("UseTab", Style.UseTab);
IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
IO.mapOptional("IndentFunctionDeclarationAfterType",
Style.IndentFunctionDeclarationAfterType);
IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
IO.mapOptional("SpacesInCStyleCastParentheses",
Style.SpacesInCStyleCastParentheses);
IO.mapOptional("SpaceBeforeAssignmentOperators",
Style.SpaceBeforeAssignmentOperators);
IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
// For backward compatibility.
if (!IO.outputting()) {
IO.mapOptional("SpaceAfterControlStatementKeyword",
Style.SpaceBeforeParens);
}
IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
}
};
// Allows to read vector<FormatStyle> while keeping default values.
// IO.getContext() should contain a pointer to the FormatStyle structure, that
// will be used to get default values for missing keys.
// If the first element has no Language specified, it will be treated as the
// default one for the following elements.
template <> struct DocumentListTraits<std::vector<FormatStyle> > {
static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
return Seq.size();
}
static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
size_t Index) {
if (Index >= Seq.size()) {
assert(Index == Seq.size());
FormatStyle Template;
if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
Template = Seq[0];
} else {
Template = *((const FormatStyle*)IO.getContext());
Template.Language = FormatStyle::LK_None;
}
Seq.resize(Index + 1, Template);
}
return Seq[Index];
}
};
}
}
namespace clang {
namespace format {
FormatStyle getLLVMStyle() {
FormatStyle LLVMStyle;
LLVMStyle.Language = FormatStyle::LK_Cpp;
LLVMStyle.AccessModifierOffset = -2;
LLVMStyle.AlignEscapedNewlinesLeft = false;
LLVMStyle.AlignTrailingComments = true;
LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
LLVMStyle.AllowShortFunctionsOnASingleLine = true;
LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
LLVMStyle.AllowShortLoopsOnASingleLine = false;
LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
LLVMStyle.AlwaysBreakTemplateDeclarations = false;
LLVMStyle.BinPackParameters = true;
LLVMStyle.BreakBeforeBinaryOperators = false;
LLVMStyle.BreakBeforeTernaryOperators = true;
LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
LLVMStyle.BreakConstructorInitializersBeforeComma = false;
LLVMStyle.ColumnLimit = 80;
LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
LLVMStyle.ConstructorInitializerIndentWidth = 4;
LLVMStyle.Cpp11BracedListStyle = false;
LLVMStyle.DerivePointerBinding = false;
LLVMStyle.ExperimentalAutoDetectBinPacking = false;
LLVMStyle.IndentCaseLabels = false;
LLVMStyle.IndentFunctionDeclarationAfterType = false;
LLVMStyle.IndentWidth = 2;
LLVMStyle.TabWidth = 8;
LLVMStyle.MaxEmptyLinesToKeep = 1;
LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
LLVMStyle.ObjCSpaceBeforeProtocolList = true;
LLVMStyle.PointerBindsToType = false;
LLVMStyle.SpacesBeforeTrailingComments = 1;
LLVMStyle.Standard = FormatStyle::LS_Cpp03;
LLVMStyle.UseTab = FormatStyle::UT_Never;
LLVMStyle.SpacesInParentheses = false;
LLVMStyle.SpaceInEmptyParentheses = false;
LLVMStyle.SpacesInCStyleCastParentheses = false;
LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
LLVMStyle.SpaceBeforeAssignmentOperators = true;
LLVMStyle.ContinuationIndentWidth = 4;
LLVMStyle.SpacesInAngles = false;
LLVMStyle.PenaltyBreakComment = 60;
LLVMStyle.PenaltyBreakFirstLessLess = 120;
LLVMStyle.PenaltyBreakString = 1000;
LLVMStyle.PenaltyExcessCharacter = 1000000;
LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
return LLVMStyle;
}
FormatStyle getGoogleStyle() {
FormatStyle GoogleStyle = getLLVMStyle();
GoogleStyle.AccessModifierOffset = -1;
GoogleStyle.AlignEscapedNewlinesLeft = true;
GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
GoogleStyle.AllowShortLoopsOnASingleLine = true;
GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
GoogleStyle.AlwaysBreakTemplateDeclarations = true;
GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
GoogleStyle.Cpp11BracedListStyle = true;
GoogleStyle.DerivePointerBinding = true;
GoogleStyle.IndentCaseLabels = true;
GoogleStyle.IndentFunctionDeclarationAfterType = true;
GoogleStyle.ObjCSpaceBeforeProtocolList = false;
GoogleStyle.PointerBindsToType = true;
GoogleStyle.SpacesBeforeTrailingComments = 2;
GoogleStyle.Standard = FormatStyle::LS_Auto;
GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
return GoogleStyle;
}
FormatStyle getGoogleJSStyle() {
FormatStyle GoogleJSStyle = getGoogleStyle();
GoogleJSStyle.Language = FormatStyle::LK_JavaScript;
GoogleJSStyle.BreakBeforeTernaryOperators = false;
// FIXME: Currently unimplemented:
// var arr = [1, 2, 3]; // No space after [ or before ].
// var obj = {a: 1, b: 2, c: 3}; // No space after ':'.
return GoogleJSStyle;
}
FormatStyle getChromiumStyle() {
FormatStyle ChromiumStyle = getGoogleStyle();
ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
ChromiumStyle.AllowShortLoopsOnASingleLine = false;
ChromiumStyle.BinPackParameters = false;
ChromiumStyle.DerivePointerBinding = false;
ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
return ChromiumStyle;
}
FormatStyle getMozillaStyle() {
FormatStyle MozillaStyle = getLLVMStyle();
MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
MozillaStyle.DerivePointerBinding = true;
MozillaStyle.IndentCaseLabels = true;
MozillaStyle.ObjCSpaceBeforeProtocolList = false;
MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
MozillaStyle.PointerBindsToType = true;
return MozillaStyle;
}
FormatStyle getWebKitStyle() {
FormatStyle Style = getLLVMStyle();
Style.AccessModifierOffset = -4;
Style.AlignTrailingComments = false;
Style.BreakBeforeBinaryOperators = true;
Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
Style.BreakConstructorInitializersBeforeComma = true;
Style.ColumnLimit = 0;
Style.IndentWidth = 4;
Style.NamespaceIndentation = FormatStyle::NI_Inner;
Style.PointerBindsToType = true;
return Style;
}
FormatStyle getGNUStyle() {
FormatStyle Style = getLLVMStyle();
Style.BreakBeforeBinaryOperators = true;
Style.BreakBeforeBraces = FormatStyle::BS_GNU;
Style.BreakBeforeTernaryOperators = true;
Style.ColumnLimit = 79;
Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
return Style;
}
bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
FormatStyle *Style) {
if (Name.equals_lower("llvm")) {
*Style = getLLVMStyle();
} else if (Name.equals_lower("chromium")) {
*Style = getChromiumStyle();
} else if (Name.equals_lower("mozilla")) {
*Style = getMozillaStyle();
} else if (Name.equals_lower("google")) {
*Style = Language == FormatStyle::LK_JavaScript ? getGoogleJSStyle()
: getGoogleStyle();
} else if (Name.equals_lower("webkit")) {
*Style = getWebKitStyle();
} else if (Name.equals_lower("gnu")) {
*Style = getGNUStyle();
} else {
return false;
}
Style->Language = Language;
return true;
}
llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
assert(Style);
FormatStyle::LanguageKind Language = Style->Language;
assert(Language != FormatStyle::LK_None);
if (Text.trim().empty())
return llvm::make_error_code(llvm::errc::invalid_argument);
std::vector<FormatStyle> Styles;
llvm::yaml::Input Input(Text);
// DocumentListTraits<vector<FormatStyle>> uses the context to get default
// values for the fields, keys for which are missing from the configuration.
// Mapping also uses the context to get the language to find the correct
// base style.
Input.setContext(Style);
Input >> Styles;
if (Input.error())
return Input.error();
for (unsigned i = 0; i < Styles.size(); ++i) {
// Ensures that only the first configuration can skip the Language option.
if (Styles[i].Language == FormatStyle::LK_None && i != 0)
return llvm::make_error_code(llvm::errc::invalid_argument);
// Ensure that each language is configured at most once.
for (unsigned j = 0; j < i; ++j) {
if (Styles[i].Language == Styles[j].Language) {
DEBUG(llvm::dbgs()
<< "Duplicate languages in the config file on positions " << j
<< " and " << i << "\n");
return llvm::make_error_code(llvm::errc::invalid_argument);
}
}
}
// Look for a suitable configuration starting from the end, so we can
// find the configuration for the specific language first, and the default
// configuration (which can only be at slot 0) after it.
for (int i = Styles.size() - 1; i >= 0; --i) {
if (Styles[i].Language == Language ||
Styles[i].Language == FormatStyle::LK_None) {
*Style = Styles[i];
Style->Language = Language;
return llvm::make_error_code(llvm::errc::success);
}
}
return llvm::make_error_code(llvm::errc::not_supported);
}
std::string configurationAsText(const FormatStyle &Style) {
std::string Text;
llvm::raw_string_ostream Stream(Text);
llvm::yaml::Output Output(Stream);
// We use the same mapping method for input and output, so we need a non-const
// reference here.
FormatStyle NonConstStyle = Style;
Output << NonConstStyle;
return Stream.str();
}
namespace {
class NoColumnLimitFormatter {
public:
NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
/// \brief Formats the line starting at \p State, simply keeping all of the
/// input's line breaking decisions.
void format(unsigned FirstIndent, const AnnotatedLine *Line) {
LineState State =
Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
while (State.NextToken != NULL) {
bool Newline =
Indenter->mustBreak(State) ||
(Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
}
}
private:
ContinuationIndenter *Indenter;
};
class LineJoiner {
public:
LineJoiner(const FormatStyle &Style) : Style(Style) {}
/// \brief Calculates how many lines can be merged into 1 starting at \p I.
unsigned
tryFitMultipleLinesInOne(unsigned Indent,
SmallVectorImpl<AnnotatedLine *>::const_iterator I,
SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
// We can never merge stuff if there are trailing line comments.
AnnotatedLine *TheLine = *I;
if (TheLine->Last->Type == TT_LineComment)
return 0;
if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
return 0;
unsigned Limit =
Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
// If we already exceed the column limit, we set 'Limit' to 0. The different
// tryMerge..() functions can then decide whether to still do merging.
Limit = TheLine->Last->TotalLength > Limit
? 0
: Limit - TheLine->Last->TotalLength;
if (I + 1 == E || I[1]->Type == LT_Invalid)
return 0;
if (TheLine->Last->Type == TT_FunctionLBrace) {
return Style.AllowShortFunctionsOnASingleLine
? tryMergeSimpleBlock(I, E, Limit)
: 0;
}
if (TheLine->Last->is(tok::l_brace)) {
return Style.BreakBeforeBraces == FormatStyle::BS_Attach
? tryMergeSimpleBlock(I, E, Limit)
: 0;
}
if (I[1]->First->Type == TT_FunctionLBrace &&
Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
// Reduce the column limit by the number of spaces we need to insert
// around braces.
Limit = Limit > 3 ? Limit - 3 : 0;
unsigned MergedLines = 0;
if (Style.AllowShortFunctionsOnASingleLine) {
MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
// If we managed to merge the block, count the function header, which is
// on a separate line.
if (MergedLines > 0)
++MergedLines;
}
return MergedLines;
}
if (TheLine->First->is(tok::kw_if)) {
return Style.AllowShortIfStatementsOnASingleLine
? tryMergeSimpleControlStatement(I, E, Limit)
: 0;
}
if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
return Style.AllowShortLoopsOnASingleLine
? tryMergeSimpleControlStatement(I, E, Limit)
: 0;
}
if (TheLine->InPPDirective &&
(TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
return tryMergeSimplePPDirective(I, E, Limit);
}
return 0;
}
private:
unsigned
tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
SmallVectorImpl<AnnotatedLine *>::const_iterator E,
unsigned Limit) {
if (Limit == 0)
return 0;
if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
return 0;
if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
return 0;
if (1 + I[1]->Last->TotalLength > Limit)
return 0;
return 1;
}
unsigned tryMergeSimpleControlStatement(
SmallVectorImpl<AnnotatedLine *>::const_iterator I,
SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
if (Limit == 0)
return 0;
if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
I[1]->First->is(tok::l_brace))
return 0;
if (I[1]->InPPDirective != (*I)->InPPDirective ||
(I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
return 0;
AnnotatedLine &Line = **I;
if (Line.Last->isNot(tok::r_paren))
return 0;
if (1 + I[1]->Last->TotalLength > Limit)
return 0;
if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
tok::kw_while) ||
I[1]->First->Type == TT_LineComment)
return 0;
// Only inline simple if's (no nested if or else).
if (I + 2 != E && Line.First->is(tok::kw_if) &&
I[2]->First->is(tok::kw_else))
return 0;
return 1;
}
unsigned
tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
SmallVectorImpl<AnnotatedLine *>::const_iterator E,
unsigned Limit) {
// First, check that the current line allows merging. This is the case if
// we're not in a control flow statement and the last token is an opening
// brace.
AnnotatedLine &Line = **I;
if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
tok::kw_else, tok::kw_try, tok::kw_catch,
tok::kw_for,
// This gets rid of all ObjC @ keywords and methods.
tok::at, tok::minus, tok::plus))
return 0;
FormatToken *Tok = I[1]->First;
if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
(Tok->getNextNonComment() == NULL ||
Tok->getNextNonComment()->is(tok::semi))) {
// We merge empty blocks even if the line exceeds the column limit.
Tok->SpacesRequiredBefore = 0;
Tok->CanBreakBefore = true;
return 1;
} else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
// Check that we still have three lines and they fit into the limit.
if (I + 2 == E || I[2]->Type == LT_Invalid)
return 0;
if (!nextTwoLinesFitInto(I, Limit))
return 0;
// Second, check that the next line does not contain any braces - if it
// does, readability declines when putting it into a single line.
if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore)
return 0;
do {
if (Tok->isOneOf(tok::l_brace, tok::r_brace))
return 0;
Tok = Tok->Next;
} while (Tok != NULL);
// Last, check that the third line contains a single closing brace.
Tok = I[2]->First;
if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
Tok->MustBreakBefore)
return 0;
return 2;
}
return 0;
}
bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
unsigned Limit) {
return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
}
const FormatStyle &Style;
};
class UnwrappedLineFormatter {
public:
UnwrappedLineFormatter(ContinuationIndenter *Indenter,
WhitespaceManager *Whitespaces,
const FormatStyle &Style)
: Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
Joiner(Style) {}
unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
int AdditionalIndent = 0, bool FixBadIndentation = false) {
assert(!Lines.empty());
unsigned Penalty = 0;
std::vector<int> IndentForLevel;
for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
const AnnotatedLine *PreviousLine = NULL;
for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
E = Lines.end();
I != E; ++I) {
const AnnotatedLine &TheLine = **I;
const FormatToken *FirstTok = TheLine.First;
int Offset = getIndentOffset(*FirstTok);
// Determine indent and try to merge multiple unwrapped lines.
while (IndentForLevel.size() <= TheLine.Level)
IndentForLevel.push_back(-1);
IndentForLevel.resize(TheLine.Level + 1);
unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
if (static_cast<int>(Indent) + Offset >= 0)
Indent += Offset;
unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
if (MergedLines > 0 && Style.ColumnLimit == 0) {
// Disallow line merging if there is a break at the start of one of the
// input lines.
for (unsigned i = 0; i < MergedLines; ++i) {
if (I[i + 1]->First->NewlinesBefore > 0)
MergedLines = 0;
}
}
if (!DryRun) {
for (unsigned i = 0; i < MergedLines; ++i) {
join(*I[i], *I[i + 1]);
}
}
I += MergedLines;
unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
bool FixIndentation =
FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
if (TheLine.First->is(tok::eof)) {
if (PreviousLine && PreviousLine->Affected && !DryRun) {
// Remove the file's trailing whitespace.
unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
/*IndentLevel=*/0, /*Spaces=*/0,
/*TargetColumn=*/0);
}
} else if (TheLine.Type != LT_Invalid &&
(TheLine.Affected || FixIndentation)) {
if (FirstTok->WhitespaceRange.isValid()) {
if (!DryRun)
formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
Indent, TheLine.InPPDirective);
} else {
Indent = LevelIndent = FirstTok->OriginalColumn;
}
// If everything fits on a single line, just put it there.
unsigned ColumnLimit = Style.ColumnLimit;
if (I + 1 != E) {
AnnotatedLine *NextLine = I[1];
if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
ColumnLimit = getColumnLimit(TheLine.InPPDirective);
}
if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
while (State.NextToken != NULL)
Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
} else if (Style.ColumnLimit == 0) {
// FIXME: Implement nested blocks for ColumnLimit = 0.
NoColumnLimitFormatter Formatter(Indenter);
if (!DryRun)
Formatter.format(Indent, &TheLine);
} else {
Penalty += format(TheLine, Indent, DryRun);
}
IndentForLevel[TheLine.Level] = LevelIndent;
} else if (TheLine.ChildrenAffected) {
format(TheLine.Children, DryRun);
} else {
// Format the first token if necessary, and notify the WhitespaceManager
// about the unchanged whitespace.
for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
if (Tok == TheLine.First &&
(Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
unsigned LevelIndent = Tok->OriginalColumn;
if (!DryRun) {
// Remove trailing whitespace of the previous line.
if ((PreviousLine && PreviousLine->Affected) ||
TheLine.LeadingEmptyLinesAffected) {
formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
TheLine.InPPDirective);
} else {
Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
}
}
if (static_cast<int>(LevelIndent) - Offset >= 0)
LevelIndent -= Offset;
if (Tok->isNot(tok::comment))
IndentForLevel[TheLine.Level] = LevelIndent;
} else if (!DryRun) {
Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
}
}
}
if (!DryRun) {
for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
Tok->Finalized = true;
}
}
PreviousLine = *I;
}
return Penalty;
}
private:
/// \brief Formats an \c AnnotatedLine and returns the penalty.
///
/// If \p DryRun is \c false, directly applies the changes.
unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
bool DryRun) {
LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
// If the ObjC method declaration does not fit on a line, we should format
// it with one arg per line.
if (State.Line->Type == LT_ObjCMethodDecl)
State.Stack.back().BreakBeforeParameter = true;
// Find best solution in solution space.
return analyzeSolutionSpace(State, DryRun);
}
/// \brief An edge in the solution space from \c Previous->State to \c State,
/// inserting a newline dependent on the \c NewLine.
struct StateNode {
StateNode(const LineState &State, bool NewLine, StateNode *Previous)
: State(State), NewLine(NewLine), Previous(Previous) {}
LineState State;
bool NewLine;
StateNode *Previous;
};
/// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
///
/// In case of equal penalties, we want to prefer states that were inserted
/// first. During state generation we make sure that we insert states first
/// that break the line as late as possible.
typedef std::pair<unsigned, unsigned> OrderedPenalty;
/// \brief An item in the prioritized BFS search queue. The \c StateNode's
/// \c State has the given \c OrderedPenalty.
typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
/// \brief The BFS queue type.
typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
std::greater<QueueItem> > QueueType;
/// \brief Get the offset of the line relatively to the level.
///
/// For example, 'public:' labels in classes are offset by 1 or 2
/// characters to the left from their level.
int getIndentOffset(const FormatToken &RootToken) {
if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
return Style.AccessModifierOffset;
return 0;
}
/// \brief Add a new line and the required indent before the first Token
/// of the \c UnwrappedLine if there was no structural parsing error.
void formatFirstToken(FormatToken &RootToken,
const AnnotatedLine *PreviousLine, unsigned IndentLevel,
unsigned Indent, bool InPPDirective) {
unsigned Newlines =
std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
// Remove empty lines before "}" where applicable.
if (RootToken.is(tok::r_brace) &&
(!RootToken.Next ||
(RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
Newlines = std::min(Newlines, 1u);
if (Newlines == 0 && !RootToken.IsFirst)
Newlines = 1;
// Insert extra new line before access specifiers.
if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
++Newlines;
// Remove empty lines after access specifiers.
if (PreviousLine && PreviousLine->First->isAccessSpecifier())
Newlines = std::min(1u, Newlines);
Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
Indent, InPPDirective &&
!RootToken.HasUnescapedNewline);
}
/// \brief Get the indent of \p Level from \p IndentForLevel.
///
/// \p IndentForLevel must contain the indent for the level \c l
/// at \p IndentForLevel[l], or a value < 0 if the indent for
/// that level is unknown.
unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
if (IndentForLevel[Level] != -1)
return IndentForLevel[Level];
if (Level == 0)
return 0;
return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
}
void join(AnnotatedLine &A, const AnnotatedLine &B) {
assert(!A.Last->Next);
assert(!B.First->Previous);
if (B.Affected)
A.Affected = true;
A.Last->Next = B.First;
B.First->Previous = A.Last;
B.First->CanBreakBefore = true;
unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
Tok->TotalLength += LengthA;
A.Last = Tok;
}
}
unsigned getColumnLimit(bool InPPDirective) const {
// In preprocessor directives reserve two chars for trailing " \"
return Style.ColumnLimit - (InPPDirective ? 2 : 0);
}
/// \brief Analyze the entire solution space starting from \p InitialState.
///
/// This implements a variant of Dijkstra's algorithm on the graph that spans
/// the solution space (\c LineStates are the nodes). The algorithm tries to
/// find the shortest path (the one with lowest penalty) from \p InitialState
/// to a state where all tokens are placed. Returns the penalty.
///
/// If \p DryRun is \c false, directly applies the changes.
unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
std::set<LineState> Seen;
// Increasing count of \c StateNode items we have created. This is used to
// create a deterministic order independent of the container.
unsigned Count = 0;
QueueType Queue;
// Insert start element into queue.
StateNode *Node =
new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
++Count;
unsigned Penalty = 0;
// While not empty, take first element and follow edges.
while (!Queue.empty()) {
Penalty = Queue.top().first.first;
StateNode *Node = Queue.top().second;
if (Node->State.NextToken == NULL) {
DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
break;
}
Queue.pop();
// Cut off the analysis of certain solutions if the analysis gets too
// complex. See description of IgnoreStackForComparison.
if (Count > 10000)
Node->State.IgnoreStackForComparison = true;
if (!Seen.insert(Node->State).second)
// State already examined with lower penalty.
continue;
FormatDecision LastFormat = Node->State.NextToken->Decision;
if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
}
if (Queue.empty()) {
// We were unable to find a solution, do nothing.
// FIXME: Add diagnostic?
DEBUG(llvm::dbgs() << "Could not find a solution.\n");
return 0;
}
// Reconstruct the solution.
if (!DryRun)
reconstructPath(InitialState, Queue.top().second);
DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
DEBUG(llvm::dbgs() << "---\n");
return Penalty;
}
void reconstructPath(LineState &State, StateNode *Current) {
std::deque<StateNode *> Path;
// We do not need a break before the initial token.
while (Current->Previous) {
Path.push_front(Current);
Current = Current->Previous;
}
for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
I != E; ++I) {
unsigned Penalty = 0;
formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
DEBUG({
if ((*I)->NewLine) {
llvm::dbgs() << "Penalty for placing "
<< (*I)->Previous->State.NextToken->Tok.getName() << ": "
<< Penalty << "\n";
}
});
}
}
/// \brief Add the following state to the analysis queue \c Queue.
///
/// Assume the current state is \p PreviousNode and has been reached with a
/// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
bool NewLine, unsigned *Count, QueueType *Queue) {
if (NewLine && !Indenter->canBreak(PreviousNode->State))
return;
if (!NewLine && Indenter->mustBreak(PreviousNode->State))
return;
StateNode *Node = new (Allocator.Allocate())
StateNode(PreviousNode->State, NewLine, PreviousNode);
if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
return;
Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
++(*Count);
}
/// \brief If the \p State's next token is an r_brace closing a nested block,
/// format the nested block before it.
///
/// Returns \c true if all children could be placed successfully and adapts
/// \p Penalty as well as \p State. If \p DryRun is false, also directly
/// creates changes using \c Whitespaces.
///
/// The crucial idea here is that children always get formatted upon
/// encountering the closing brace right after the nested block. Now, if we
/// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
/// \c false), the entire block has to be kept on the same line (which is only
/// possible if it fits on the line, only contains a single statement, etc.
///
/// If \p NewLine is true, we format the nested block on separate lines, i.e.
/// break after the "{", format all lines with correct indentation and the put
/// the closing "}" on yet another new line.
///
/// This enables us to keep the simple structure of the
/// \c UnwrappedLineFormatter, where we only have two options for each token:
/// break or don't break.
bool formatChildren(LineState &State, bool NewLine, bool DryRun,
unsigned &Penalty) {
FormatToken &Previous = *State.NextToken->Previous;
const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
if (!LBrace || LBrace->isNot(tok::l_brace) ||
LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
// The previous token does not open a block. Nothing to do. We don't
// assert so that we can simply call this function for all tokens.
return true;
if (NewLine) {
int AdditionalIndent = State.Stack.back().Indent -
Previous.Children[0]->Level * Style.IndentWidth;
Penalty += format(Previous.Children, DryRun, AdditionalIndent,
/*FixBadIndentation=*/true);
return true;
}
// Cannot merge multiple statements into a single line.
if (Previous.Children.size() > 1)
return false;
// We can't put the closing "}" on a line with a trailing comment.
if (Previous.Children[0]->Last->isTrailingComment())
return false;
if (!DryRun) {
Whitespaces->replaceWhitespace(
*Previous.Children[0]->First,
/*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
/*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
}
Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
State.Column += 1 + Previous.Children[0]->Last->TotalLength;
return true;
}
ContinuationIndenter *Indenter;
WhitespaceManager *Whitespaces;
FormatStyle Style;
LineJoiner Joiner;
llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
};
class FormatTokenLexer {
public:
FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
encoding::Encoding Encoding)
: FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
Lex.SetKeepWhitespaceMode(true);
}
ArrayRef<FormatToken *> lex() {
assert(Tokens.empty());
do {
Tokens.push_back(getNextToken());
tryMergePreviousTokens();
} while (Tokens.back()->Tok.isNot(tok::eof));
return Tokens;
}
IdentifierTable &getIdentTable() { return IdentTable; }
private:
void tryMergePreviousTokens() {
if (tryMerge_TMacro())
return;
if (Style.Language == FormatStyle::LK_JavaScript) {
static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
tok::greaterequal };
// FIXME: We probably need to change token type to mimic operator with the
// correct priority.
if (tryMergeTokens(JSIdentity))
return;
if (tryMergeTokens(JSNotIdentity))
return;
if (tryMergeTokens(JSShiftEqual))
return;
}
}
bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
if (Tokens.size() < Kinds.size())
return false;
SmallVectorImpl<FormatToken *>::const_iterator First =
Tokens.end() - Kinds.size();
if (!First[0]->is(Kinds[0]))
return false;
unsigned AddLength = 0;
for (unsigned i = 1; i < Kinds.size(); ++i) {
if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
First[i]->WhitespaceRange.getEnd())
return false;
AddLength += First[i]->TokenText.size();
}
Tokens.resize(Tokens.size() - Kinds.size() + 1);
First[0]->TokenText = StringRef(First[0]->TokenText.data(),
First[0]->TokenText.size() + AddLength);
First[0]->ColumnWidth += AddLength;
return true;
}
bool tryMerge_TMacro() {
if (Tokens.size() < 4)
return false;
FormatToken *Last = Tokens.back();
if (!Last->is(tok::r_paren))
return false;
FormatToken *String = Tokens[Tokens.size() - 2];
if (!String->is(tok::string_literal) || String->IsMultiline)
return false;
if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
return false;
FormatToken *Macro = Tokens[Tokens.size() - 4];
if (Macro->TokenText != "_T")
return false;
const char *Start = Macro->TokenText.data();
const char *End = Last->TokenText.data() + Last->TokenText.size();
String->TokenText = StringRef(Start, End - Start);
String->IsFirst = Macro->IsFirst;
String->LastNewlineOffset = Macro->LastNewlineOffset;
String->WhitespaceRange = Macro->WhitespaceRange;
String->OriginalColumn = Macro->OriginalColumn;
String->ColumnWidth = encoding::columnWidthWithTabs(
String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
Tokens.pop_back();
Tokens.pop_back();
Tokens.pop_back();
Tokens.back() = String;
return true;
}
FormatToken *getNextToken() {
if (GreaterStashed) {
// Create a synthesized second '>' token.
// FIXME: Increment Column and set OriginalColumn.
Token Greater = FormatTok->Tok;
FormatTok = new (Allocator.Allocate()) FormatToken;
FormatTok->Tok = Greater;
SourceLocation GreaterLocation =
FormatTok->Tok.getLocation().getLocWithOffset(1);
FormatTok->WhitespaceRange =
SourceRange(GreaterLocation, GreaterLocation);
FormatTok->TokenText = ">";
FormatTok->ColumnWidth = 1;
GreaterStashed = false;
return FormatTok;
}
FormatTok = new (Allocator.Allocate()) FormatToken;
readRawToken(*FormatTok);
SourceLocation WhitespaceStart =
FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
FormatTok->IsFirst = IsFirstToken;
IsFirstToken = false;
// Consume and record whitespace until we find a significant token.
unsigned WhitespaceLength = TrailingWhitespace;
while (FormatTok->Tok.is(tok::unknown)) {
for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
switch (FormatTok->TokenText[i]) {
case '\n':
++FormatTok->NewlinesBefore;
// FIXME: This is technically incorrect, as it could also
// be a literal backslash at the end of the line.
if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
(FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
FormatTok->TokenText[i - 2] != '\\')))
FormatTok->HasUnescapedNewline = true;
FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
Column = 0;
break;
case '\r':
case '\f':
case '\v':
Column = 0;
break;
case ' ':
++Column;
break;
case '\t':
Column += Style.TabWidth - Column % Style.TabWidth;
break;
case '\\':
++Column;
if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
FormatTok->TokenText[i + 1] != '\n'))
FormatTok->Type = TT_ImplicitStringLiteral;
break;
default:
FormatTok->Type = TT_ImplicitStringLiteral;
++Column;
break;
}
}
if (FormatTok->Type == TT_ImplicitStringLiteral)
break;
WhitespaceLength += FormatTok->Tok.getLength();
readRawToken(*FormatTok);
}
// In case the token starts with escaped newlines, we want to
// take them into account as whitespace - this pattern is quite frequent
// in macro definitions.
// FIXME: Add a more explicit test.
while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
FormatTok->TokenText[1] == '\n') {
// FIXME: ++FormatTok->NewlinesBefore is missing...
WhitespaceLength += 2;
Column = 0;
FormatTok->TokenText = FormatTok->TokenText.substr(2);
}
FormatTok->WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
FormatTok->OriginalColumn = Column;
TrailingWhitespace = 0;
if (FormatTok->Tok.is(tok::comment)) {
// FIXME: Add the trimmed whitespace to Column.
StringRef UntrimmedText = FormatTok->TokenText;
FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
} else if (FormatTok->Tok.is(tok::raw_identifier)) {
IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
FormatTok->Tok.setIdentifierInfo(&Info);
FormatTok->Tok.setKind(Info.getTokenID());
} else if (FormatTok->Tok.is(tok::greatergreater)) {
FormatTok->Tok.setKind(tok::greater);
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
GreaterStashed = true;
}
// Now FormatTok is the next non-whitespace token.
StringRef Text = FormatTok->TokenText;
size_t FirstNewlinePos = Text.find('\n');
if (FirstNewlinePos == StringRef::npos) {
// FIXME: ColumnWidth actually depends on the start column, we need to
// take this into account when the token is moved.
FormatTok->ColumnWidth =
encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
Column += FormatTok->ColumnWidth;
} else {
FormatTok->IsMultiline = true;
// FIXME: ColumnWidth actually depends on the start column, we need to
// take this into account when the token is moved.
FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
// The last line of the token always starts in column 0.
// Thus, the length can be precomputed even in the presence of tabs.
FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
Encoding);
Column = FormatTok->LastLineColumnWidth;
}
return FormatTok;
}
FormatToken *FormatTok;
bool IsFirstToken;
bool GreaterStashed;
unsigned Column;
unsigned TrailingWhitespace;
Lexer &Lex;
SourceManager &SourceMgr;
FormatStyle &Style;
IdentifierTable IdentTable;
encoding::Encoding Encoding;
llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
SmallVector<FormatToken *, 16> Tokens;
void readRawToken(FormatToken &Tok) {
Lex.LexFromRawLexer(Tok.Tok);
Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
Tok.Tok.getLength());
// For formatting, treat unterminated string literals like normal string
// literals.
if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
Tok.TokenText[0] == '"') {
Tok.Tok.setKind(tok::string_literal);
Tok.IsUnterminatedLiteral = true;
}
}
};
static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
switch (Language) {
case FormatStyle::LK_Cpp:
return "C++";
case FormatStyle::LK_JavaScript:
return "JavaScript";
default:
return "Unknown";
}
}
class Formatter : public UnwrappedLineConsumer {
public:
Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
const std::vector<CharSourceRange> &Ranges)
: Style(Style), Lex(Lex), SourceMgr(SourceMgr),
Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
Encoding(encoding::detectEncoding(Lex.getBuffer())) {
DEBUG(llvm::dbgs() << "File encoding: "
<< (Encoding == encoding::Encoding_UTF8 ? "UTF8"
: "unknown")
<< "\n");
DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
<< "\n");
}
tooling::Replacements format() {
tooling::Replacements Result;
FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
bool StructuralError = Parser.parse();
assert(UnwrappedLines.rbegin()->empty());
for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
++Run) {
DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
SmallVector<AnnotatedLine *, 16> AnnotatedLines;
for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
}
tooling::Replacements RunResult =
format(AnnotatedLines, StructuralError, Tokens);
DEBUG({
llvm::dbgs() << "Replacements for run " << Run << ":\n";
for (tooling::Replacements::iterator I = RunResult.begin(),
E = RunResult.end();
I != E; ++I) {
llvm::dbgs() << I->toString() << "\n";
}
});
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
delete AnnotatedLines[i];
}
Result.insert(RunResult.begin(), RunResult.end());
Whitespaces.reset();
}
return Result;
}
tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
bool StructuralError, FormatTokenLexer &Tokens) {
TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
Annotator.annotate(*AnnotatedLines[i]);
}
deriveLocalStyle(AnnotatedLines);
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
}
computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
Annotator.setCommentLineLevels(AnnotatedLines);
ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
BinPackInconclusiveFunctions);
UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
Formatter.format(AnnotatedLines, /*DryRun=*/false);
return Whitespaces.generateReplacements();
}
private:
// Determines which lines are affected by the SourceRanges given as input.
// Returns \c true if at least one line between I and E or one of their
// children is affected.
bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
SmallVectorImpl<AnnotatedLine *>::iterator E) {
bool SomeLineAffected = false;
const AnnotatedLine *PreviousLine = NULL;
while (I != E) {
AnnotatedLine *Line = *I;
Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
// If a line is part of a preprocessor directive, it needs to be formatted
// if any token within the directive is affected.
if (Line->InPPDirective) {
FormatToken *Last = Line->Last;
SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
Last = (*PPEnd)->Last;
++PPEnd;
}
if (affectsTokenRange(*Line->First, *Last,
/*IncludeLeadingNewlines=*/false)) {
SomeLineAffected = true;
markAllAsAffected(I, PPEnd);
}
I = PPEnd;
continue;
}
if (nonPPLineAffected(Line, PreviousLine))
SomeLineAffected = true;
PreviousLine = Line;
++I;
}
return SomeLineAffected;
}
// Determines whether 'Line' is affected by the SourceRanges given as input.
// Returns \c true if line or one if its children is affected.
bool nonPPLineAffected(AnnotatedLine *Line,
const AnnotatedLine *PreviousLine) {
bool SomeLineAffected = false;
Line->ChildrenAffected =
computeAffectedLines(Line->Children.begin(), Line->Children.end());
if (Line->ChildrenAffected)
SomeLineAffected = true;
// Stores whether one of the line's tokens is directly affected.
bool SomeTokenAffected = false;
// Stores whether we need to look at the leading newlines of the next token
// in order to determine whether it was affected.
bool IncludeLeadingNewlines = false;
// Stores whether the first child line of any of this line's tokens is
// affected.
bool SomeFirstChildAffected = false;
for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
// Determine whether 'Tok' was affected.
if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
SomeTokenAffected = true;
// Determine whether the first child of 'Tok' was affected.
if (!Tok->Children.empty() && Tok->Children.front()->Affected)
SomeFirstChildAffected = true;
IncludeLeadingNewlines = Tok->Children.empty();
}
// Was this line moved, i.e. has it previously been on the same line as an
// affected line?
bool LineMoved = PreviousLine && PreviousLine->Affected &&
Line->First->NewlinesBefore == 0;
bool IsContinuedComment = Line->First->is(tok::comment) &&
Line->First->Next == NULL &&
Line->First->NewlinesBefore < 2 && PreviousLine &&
PreviousLine->Affected &&
PreviousLine->Last->is(tok::comment);
if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
IsContinuedComment) {
Line->Affected = true;
SomeLineAffected = true;
}
return SomeLineAffected;
}
// Marks all lines between I and E as well as all their children as affected.
void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
SmallVectorImpl<AnnotatedLine *>::iterator E) {
while (I != E) {
(*I)->Affected = true;
markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
++I;
}
}
// Returns true if the range from 'First' to 'Last' intersects with one of the
// input ranges.
bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
bool IncludeLeadingNewlines) {
SourceLocation Start = First.WhitespaceRange.getBegin();
if (!IncludeLeadingNewlines)
Start = Start.getLocWithOffset(First.LastNewlineOffset);
SourceLocation End = Last.getStartOfNonWhitespace();
if (Last.TokenText.size() > 0)
End = End.getLocWithOffset(Last.TokenText.size() - 1);
CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
return affectsCharSourceRange(Range);
}
// Returns true if one of the input ranges intersect the leading empty lines
// before 'Tok'.
bool affectsLeadingEmptyLines(const FormatToken &Tok) {
CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
Tok.WhitespaceRange.getBegin(),
Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
return affectsCharSourceRange(EmptyLineRange);
}
// Returns true if 'Range' intersects with one of the input ranges.
bool affectsCharSourceRange(const CharSourceRange &Range) {
for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
E = Ranges.end();
I != E; ++I) {
if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
!SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
return true;
}
return false;
}
static bool inputUsesCRLF(StringRef Text) {
return Text.count('\r') * 2 > Text.count('\n');
}
void
deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
unsigned CountBoundToVariable = 0;
unsigned CountBoundToType = 0;
bool HasCpp03IncompatibleFormat = false;
bool HasBinPackedFunction = false;
bool HasOnePerLineFunction = false;
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
if (!AnnotatedLines[i]->First->Next)
continue;
FormatToken *Tok = AnnotatedLines[i]->First->Next;
while (Tok->Next) {
if (Tok->Type == TT_PointerOrReference) {
bool SpacesBefore =
Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
Tok->Next->WhitespaceRange.getEnd();
if (SpacesBefore && !SpacesAfter)
++CountBoundToVariable;
else if (!SpacesBefore && SpacesAfter)
++CountBoundToType;
}
if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
if (Tok->is(tok::coloncolon) &&
Tok->Previous->Type == TT_TemplateOpener)
HasCpp03IncompatibleFormat = true;
if (Tok->Type == TT_TemplateCloser &&
Tok->Previous->Type == TT_TemplateCloser)
HasCpp03IncompatibleFormat = true;
}
if (Tok->PackingKind == PPK_BinPacked)
HasBinPackedFunction = true;
if (Tok->PackingKind == PPK_OnePerLine)
HasOnePerLineFunction = true;
Tok = Tok->Next;
}
}
if (Style.DerivePointerBinding) {
if (CountBoundToType > CountBoundToVariable)
Style.PointerBindsToType = true;
else if (CountBoundToType < CountBoundToVariable)
Style.PointerBindsToType = false;
}
if (Style.Standard == FormatStyle::LS_Auto) {
Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
: FormatStyle::LS_Cpp03;
}
BinPackInconclusiveFunctions =
HasBinPackedFunction || !HasOnePerLineFunction;
}
virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
assert(!UnwrappedLines.empty());
UnwrappedLines.back().push_back(TheLine);
}
virtual void finishRun() {
UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
}
FormatStyle Style;
Lexer &Lex;
SourceManager &SourceMgr;
WhitespaceManager Whitespaces;
SmallVector<CharSourceRange, 8> Ranges;
SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
encoding::Encoding Encoding;
bool BinPackInconclusiveFunctions;
};
} // end anonymous namespace
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
SourceManager &SourceMgr,
std::vector<CharSourceRange> Ranges) {
Formatter formatter(Style, Lex, SourceMgr, Ranges);
return formatter.format();
}
tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
std::vector<tooling::Range> Ranges,
StringRef FileName) {
FileManager Files((FileSystemOptions()));
DiagnosticsEngine Diagnostics(
IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
new DiagnosticOptions);
SourceManager SourceMgr(Diagnostics, Files);
llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
const clang::FileEntry *Entry =
Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
SourceMgr.overrideFileContents(Entry, Buf);
FileID ID =
SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
getFormattingLangOpts(Style.Standard));
SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
std::vector<CharSourceRange> CharRanges;
for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
}
return reformat(Style, Lex, SourceMgr, CharRanges);
}
LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
LangOptions LangOpts;
LangOpts.CPlusPlus = 1;
LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
LangOpts.LineComment = 1;
LangOpts.Bool = 1;
LangOpts.ObjC1 = 1;
LangOpts.ObjC2 = 1;
return LangOpts;
}
const char *StyleOptionHelpDescription =
"Coding style, currently supports:\n"
" LLVM, Google, Chromium, Mozilla, WebKit.\n"
"Use -style=file to load style configuration from\n"
".clang-format file located in one of the parent\n"
"directories of the source file (or current\n"
"directory for stdin).\n"
"Use -style=\"{key: value, ...}\" to set specific\n"
"parameters, e.g.:\n"
" -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
if (FileName.endswith_lower(".js")) {
return FormatStyle::LK_JavaScript;
}
return FormatStyle::LK_Cpp;
}
FormatStyle getStyle(StringRef StyleName, StringRef FileName,
StringRef FallbackStyle) {
FormatStyle Style = getLLVMStyle();
Style.Language = getLanguageByFileName(FileName);
if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
llvm::errs() << "Invalid fallback style \"" << FallbackStyle
<< "\" using LLVM style\n";
return Style;
}
if (StyleName.startswith("{")) {
// Parse YAML/JSON style from the command line.
if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
<< FallbackStyle << " style\n";
}
return Style;
}
if (!StyleName.equals_lower("file")) {
if (!getPredefinedStyle(StyleName, Style.Language, &Style))
llvm::errs() << "Invalid value for -style, using " << FallbackStyle
<< " style\n";
return Style;
}
// Look for .clang-format/_clang-format file in the file's parent directories.
SmallString<128> UnsuitableConfigFiles;
SmallString<128> Path(FileName);
llvm::sys::fs::make_absolute(Path);
for (StringRef Directory = Path; !Directory.empty();
Directory = llvm::sys::path::parent_path(Directory)) {
if (!llvm::sys::fs::is_directory(Directory))
continue;
SmallString<128> ConfigFile(Directory);
llvm::sys::path::append(ConfigFile, ".clang-format");
DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
bool IsFile = false;
// Ignore errors from is_regular_file: we only need to know if we can read
// the file or not.
llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
if (!IsFile) {
// Try _clang-format too, since dotfiles are not commonly used on Windows.
ConfigFile = Directory;
llvm::sys::path::append(ConfigFile, "_clang-format");
DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
}
if (IsFile) {
OwningPtr<llvm::MemoryBuffer> Text;
if (llvm::error_code ec =
llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
llvm::errs() << ec.message() << "\n";
break;
}
if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
if (ec == llvm::errc::not_supported) {
if (!UnsuitableConfigFiles.empty())
UnsuitableConfigFiles.append(", ");
UnsuitableConfigFiles.append(ConfigFile);
continue;
}
llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
<< "\n";
break;
}
DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
return Style;
}
}
llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
<< " style\n";
if (!UnsuitableConfigFiles.empty()) {
llvm::errs() << "Configuration file(s) do(es) not support "
<< getLanguageName(Style.Language) << ": "
<< UnsuitableConfigFiles << "\n";
}
return Style;
}
} // namespace format
} // namespace clang