Sam McCall 19daa21f84 [clangd] Rethink how SelectionTree deals with macros and #includes.
Summary:
The exclusive-claim model is successful at resolving conflicts over tokens
between parent/child or siblings. However claims at the spelled-token
level do the wrong thing for macro expansions, where siblings can be
equally associated with the macro invocation.
Moreover, any model that only uses the endpoints in a range can fail when
a macro invocation occurs inside the node.

To address this, we use the existing TokenBuffer in more depth.
Claims are expressed in terms of expanded tokens, so there is no need to worry
about macros, includes etc.

Once we know which expanded tokens were claimed, they are mapped onto
spelled tokens for hit-testing.
This mapping is fairly flexible, currently the handling of macros is
pretty simple (map macro args onto spellings, other macro expansions onto the
macro name token).
This mapping is in principle token-by-token for correctness (though
there's some batching for performance).

The aggregation of the selection enum is now more principled as we need to be
able to aggregate several hit-test results together.

For simplicity i removed the ability to determine selectedness of TUDecl.
(That was originally implemented in 90a5bf92ff97b1, but doesn't seem to be very
important or worth the complexity any longer).

The expandedTokens(SourceLocation) helper could be added locally, but seems to
make sense on TokenBuffer.

Fixes https://github.com/clangd/clangd/issues/202
Fixes https://github.com/clangd/clangd/issues/126

Reviewers: hokein

Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits, ilya-biryukov

Tags: #clang

Differential Revision: https://reviews.llvm.org/D70512
2019-11-29 15:21:13 +01:00

797 lines
27 KiB
C++

//===- TokensTest.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/Expr.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.def"
#include "clang/Basic/TokenKinds.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendAction.h"
#include "clang/Frontend/Utils.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Lex/Token.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Testing/Support/Annotations.h"
#include "llvm/Testing/Support/SupportHelpers.h"
#include "gmock/gmock.h"
#include <cassert>
#include <cstdlib>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <memory>
#include <ostream>
#include <string>
using namespace clang;
using namespace clang::syntax;
using llvm::ValueIs;
using ::testing::AllOf;
using ::testing::Contains;
using ::testing::ElementsAre;
using ::testing::Field;
using ::testing::Matcher;
using ::testing::Not;
using ::testing::StartsWith;
namespace {
// Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
// argument.
MATCHER_P(SameRange, A, "") {
return A.begin() == arg.begin() && A.end() == arg.end();
}
Matcher<TokenBuffer::Expansion>
IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
Field(&TokenBuffer::Expansion::Expanded, Expanded));
}
// Matchers for syntax::Token.
MATCHER_P(Kind, K, "") { return arg.kind() == K; }
MATCHER_P2(HasText, Text, SourceMgr, "") {
return arg.text(*SourceMgr) == Text;
}
/// Checks the start and end location of a token are equal to SourceRng.
MATCHER_P(RangeIs, SourceRng, "") {
return arg.location() == SourceRng.first &&
arg.endLocation() == SourceRng.second;
}
class TokenCollectorTest : public ::testing::Test {
public:
/// Run the clang frontend, collect the preprocessed tokens from the frontend
/// invocation and store them in this->Buffer.
/// This also clears SourceManager before running the compiler.
void recordTokens(llvm::StringRef Code) {
class RecordTokens : public ASTFrontendAction {
public:
explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
bool BeginSourceFileAction(CompilerInstance &CI) override {
assert(!Collector && "expected only a single call to BeginSourceFile");
Collector.emplace(CI.getPreprocessor());
return true;
}
void EndSourceFileAction() override {
assert(Collector && "BeginSourceFileAction was never called");
Result = std::move(*Collector).consume();
}
std::unique_ptr<ASTConsumer>
CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
return std::make_unique<ASTConsumer>();
}
private:
TokenBuffer &Result;
llvm::Optional<TokenCollector> Collector;
};
constexpr const char *FileName = "./input.cpp";
FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
// Prepare to run a compiler.
if (!Diags->getClient())
Diags->setClient(new IgnoringDiagConsumer);
std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
FileName};
auto CI = createInvocationFromCommandLine(Args, Diags, FS);
assert(CI);
CI->getFrontendOpts().DisableFree = false;
CI->getPreprocessorOpts().addRemappedFile(
FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
CompilerInstance Compiler;
Compiler.setInvocation(std::move(CI));
Compiler.setDiagnostics(Diags.get());
Compiler.setFileManager(FileMgr.get());
Compiler.setSourceManager(SourceMgr.get());
this->Buffer = TokenBuffer(*SourceMgr);
RecordTokens Recorder(this->Buffer);
ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
<< "failed to run the frontend";
}
/// Record the tokens and return a test dump of the resulting buffer.
std::string collectAndDump(llvm::StringRef Code) {
recordTokens(Code);
return Buffer.dumpForTests();
}
// Adds a file to the test VFS.
void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
if (!FS->addFile(Path, time_t(),
llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
ADD_FAILURE() << "could not add a file to VFS: " << Path;
}
}
/// Add a new file, run syntax::tokenize() on it and return the results.
std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
// FIXME: pass proper LangOptions.
return syntax::tokenize(
SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
*SourceMgr, LangOptions());
}
// Specialized versions of matchers that hide the SourceManager from clients.
Matcher<syntax::Token> HasText(std::string Text) const {
return ::HasText(Text, SourceMgr.get());
}
Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
std::pair<SourceLocation, SourceLocation> Ls;
Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
.getLocWithOffset(R.Begin);
Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
.getLocWithOffset(R.End);
return ::RangeIs(Ls);
}
/// Finds a subrange in O(n * m).
template <class T, class U, class Eq>
llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
llvm::ArrayRef<T> Range, Eq F) {
for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
auto It = Begin;
for (auto ItSub = Subrange.begin();
ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
if (!F(*ItSub, *It))
goto continue_outer;
}
return llvm::makeArrayRef(Begin, It);
continue_outer:;
}
return llvm::makeArrayRef(Range.end(), Range.end());
}
/// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
/// The match should be unique. \p Query is a whitespace-separated list of
/// tokens to search for.
llvm::ArrayRef<syntax::Token>
findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
if (QueryTokens.empty()) {
ADD_FAILURE() << "will not look for an empty list of tokens";
std::abort();
}
// An equality test for search.
auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
return Q == T.text(*SourceMgr);
};
// Find a match.
auto Found =
findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
if (Found.begin() == Tokens.end()) {
ADD_FAILURE() << "could not find the subrange for " << Query;
std::abort();
}
// Check that the match is unique.
if (findSubrange(llvm::makeArrayRef(QueryTokens),
llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
.begin() != Tokens.end()) {
ADD_FAILURE() << "match is not unique for " << Query;
std::abort();
}
return Found;
};
// Specialized versions of findTokenRange for expanded and spelled tokens.
llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
return findTokenRange(Query, Buffer.expandedTokens());
}
llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
FileID File = FileID()) {
if (!File.isValid())
File = SourceMgr->getMainFileID();
return findTokenRange(Query, Buffer.spelledTokens(File));
}
// Data fields.
llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
new llvm::vfs::InMemoryFileSystem;
llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
new FileManager(FileSystemOptions(), FS);
llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
new SourceManager(*Diags, *FileMgr);
/// Contains last result of calling recordTokens().
TokenBuffer Buffer = TokenBuffer(*SourceMgr);
};
TEST_F(TokenCollectorTest, RawMode) {
EXPECT_THAT(tokenize("int main() {}"),
ElementsAre(Kind(tok::kw_int),
AllOf(HasText("main"), Kind(tok::identifier)),
Kind(tok::l_paren), Kind(tok::r_paren),
Kind(tok::l_brace), Kind(tok::r_brace)));
// Comments are ignored for now.
EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
ElementsAre(Kind(tok::kw_int),
AllOf(HasText("a"), Kind(tok::identifier)),
Kind(tok::semi)));
}
TEST_F(TokenCollectorTest, Basic) {
std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
{"int main() {}",
R"(expanded tokens:
int main ( ) { }
file './input.cpp'
spelled tokens:
int main ( ) { }
no mappings.
)"},
// All kinds of whitespace are ignored.
{"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
R"(expanded tokens:
int main ( ) { }
file './input.cpp'
spelled tokens:
int main ( ) { }
no mappings.
)"},
// Annotation tokens are ignored.
{R"cpp(
#pragma GCC visibility push (public)
#pragma GCC visibility pop
)cpp",
R"(expanded tokens:
<empty>
file './input.cpp'
spelled tokens:
# pragma GCC visibility push ( public ) # pragma GCC visibility pop
mappings:
['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
)"},
// Empty files should not crash.
{R"cpp()cpp", R"(expanded tokens:
<empty>
file './input.cpp'
spelled tokens:
<empty>
no mappings.
)"},
// Should not crash on errors inside '#define' directives. Error is that
// stringification (#B) does not refer to a macro parameter.
{
R"cpp(
a
#define MACRO() A #B
)cpp",
R"(expanded tokens:
a
file './input.cpp'
spelled tokens:
a # define MACRO ( ) A # B
mappings:
['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
)"}};
for (auto &Test : TestCases)
EXPECT_EQ(collectAndDump(Test.first), Test.second)
<< collectAndDump(Test.first);
}
TEST_F(TokenCollectorTest, Locations) {
// Check locations of the tokens.
llvm::Annotations Code(R"cpp(
$r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
)cpp");
recordTokens(Code.code());
// Check expanded tokens.
EXPECT_THAT(
Buffer.expandedTokens(),
ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
Kind(tok::eof)));
// Check spelled tokens.
EXPECT_THAT(
Buffer.spelledTokens(SourceMgr->getMainFileID()),
ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
}
TEST_F(TokenCollectorTest, MacroDirectives) {
// Macro directives are not stored anywhere at the moment.
std::string Code = R"cpp(
#define FOO a
#include "unresolved_file.h"
#undef FOO
#ifdef X
#else
#endif
#ifndef Y
#endif
#if 1
#elif 2
#else
#endif
#pragma once
#pragma something lalala
int a;
)cpp";
std::string Expected =
"expanded tokens:\n"
" int a ;\n"
"file './input.cpp'\n"
" spelled tokens:\n"
" # define FOO a # include \"unresolved_file.h\" # undef FOO "
"# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
"# endif # pragma once # pragma something lalala int a ;\n"
" mappings:\n"
" ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
EXPECT_EQ(collectAndDump(Code), Expected);
}
TEST_F(TokenCollectorTest, MacroReplacements) {
std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
// A simple object-like macro.
{R"cpp(
#define INT int const
INT a;
)cpp",
R"(expanded tokens:
int const a ;
file './input.cpp'
spelled tokens:
# define INT int const INT a ;
mappings:
['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
)"},
// A simple function-like macro.
{R"cpp(
#define INT(a) const int
INT(10+10) a;
)cpp",
R"(expanded tokens:
const int a ;
file './input.cpp'
spelled tokens:
# define INT ( a ) const int INT ( 10 + 10 ) a ;
mappings:
['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
)"},
// Recursive macro replacements.
{R"cpp(
#define ID(X) X
#define INT int const
ID(ID(INT)) a;
)cpp",
R"(expanded tokens:
int const a ;
file './input.cpp'
spelled tokens:
# define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
mappings:
['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
)"},
// A little more complicated recursive macro replacements.
{R"cpp(
#define ADD(X, Y) X+Y
#define MULT(X, Y) X*Y
int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
)cpp",
"expanded tokens:\n"
" int a = 1 * 2 + 3 * 4 + 5 ;\n"
"file './input.cpp'\n"
" spelled tokens:\n"
" # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
"a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
" mappings:\n"
" ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
" ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
// Empty macro replacement.
// FIXME: the #define directives should not be glued together.
{R"cpp(
#define EMPTY
#define EMPTY_FUNC(X)
EMPTY
EMPTY_FUNC(1+2+3)
)cpp",
R"(expanded tokens:
<empty>
file './input.cpp'
spelled tokens:
# define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
mappings:
['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
)"},
// File ends with a macro replacement.
{R"cpp(
#define FOO 10+10;
int a = FOO
)cpp",
R"(expanded tokens:
int a = 10 + 10 ;
file './input.cpp'
spelled tokens:
# define FOO 10 + 10 ; int a = FOO
mappings:
['#'_0, 'int'_7) => ['int'_0, 'int'_0)
['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
)"}};
for (auto &Test : TestCases)
EXPECT_EQ(Test.second, collectAndDump(Test.first))
<< collectAndDump(Test.first);
}
TEST_F(TokenCollectorTest, SpecialTokens) {
// Tokens coming from concatenations.
recordTokens(R"cpp(
#define CONCAT(a, b) a ## b
int a = CONCAT(1, 2);
)cpp");
EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
Contains(HasText("12")));
// Multi-line tokens with slashes at the end.
recordTokens("i\\\nn\\\nt");
EXPECT_THAT(Buffer.expandedTokens(),
ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
Kind(tok::eof)));
// FIXME: test tokens with digraphs and UCN identifiers.
}
TEST_F(TokenCollectorTest, LateBoundTokens) {
// The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
// but we choose to record them as a single token (for now).
llvm::Annotations Code(R"cpp(
template <class T>
struct foo { int a; };
int bar = foo<foo<int$br[[>>]]().a;
int baz = 10 $op[[>>]] 2;
)cpp");
recordTokens(Code.code());
EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
AllOf(Contains(AllOf(Kind(tok::greatergreater),
RangeIs(Code.range("br")))),
Contains(AllOf(Kind(tok::greatergreater),
RangeIs(Code.range("op"))))));
}
TEST_F(TokenCollectorTest, DelayedParsing) {
llvm::StringLiteral Code = R"cpp(
struct Foo {
int method() {
// Parser will visit method bodies and initializers multiple times, but
// TokenBuffer should only record the first walk over the tokens;
return 100;
}
int a = 10;
struct Subclass {
void foo() {
Foo().method();
}
};
};
)cpp";
std::string ExpectedTokens =
"expanded tokens:\n"
" struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
"Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
}
TEST_F(TokenCollectorTest, MultiFile) {
addFile("./foo.h", R"cpp(
#define ADD(X, Y) X+Y
int a = 100;
#include "bar.h"
)cpp");
addFile("./bar.h", R"cpp(
int b = ADD(1, 2);
#define MULT(X, Y) X*Y
)cpp");
llvm::StringLiteral Code = R"cpp(
#include "foo.h"
int c = ADD(1, MULT(2,3));
)cpp";
std::string Expected = R"(expanded tokens:
int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
file './input.cpp'
spelled tokens:
# include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
mappings:
['#'_0, 'int'_3) => ['int'_12, 'int'_12)
['ADD'_6, ';'_17) => ['1'_15, ';'_20)
file './foo.h'
spelled tokens:
# define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
mappings:
['#'_0, 'int'_11) => ['int'_0, 'int'_0)
['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
file './bar.h'
spelled tokens:
int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
mappings:
['ADD'_3, ';'_9) => ['1'_8, ';'_11)
['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
)";
EXPECT_EQ(Expected, collectAndDump(Code))
<< "input: " << Code << "\nresults: " << collectAndDump(Code);
}
class TokenBufferTest : public TokenCollectorTest {};
TEST_F(TokenBufferTest, SpelledByExpanded) {
recordTokens(R"cpp(
a1 a2 a3 b1 b2
)cpp");
// Sanity check: expanded and spelled tokens are stored separately.
EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
// Searching for subranges of expanded tokens should give the corresponding
// spelled ones.
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
ValueIs(SameRange(findSpelled("a1 a2 a3"))));
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
ValueIs(SameRange(findSpelled("b1 b2"))));
// Test search on simple macro expansions.
recordTokens(R"cpp(
#define A a1 a2 a3
#define B b1 b2
A split B
)cpp");
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
ValueIs(SameRange(findSpelled("A split B"))));
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
ValueIs(SameRange(findSpelled("A split").drop_back())));
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
ValueIs(SameRange(findSpelled("split B").drop_front())));
// Ranges not fully covering macro invocations should fail.
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
llvm::None);
// Recursive macro invocations.
recordTokens(R"cpp(
#define ID(x) x
#define B b1 b2
ID(ID(ID(a1) a2 a3)) split ID(B)
)cpp");
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
ValueIs(SameRange(findSpelled("ID ( B )"))));
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
ValueIs(SameRange(findSpelled(
"ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
// Ranges crossing macro call boundaries.
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
llvm::None);
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
llvm::None);
// FIXME: next two examples should map to macro arguments, but currently they
// fail.
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
// Empty macro expansions.
recordTokens(R"cpp(
#define EMPTY
#define ID(X) X
EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
EMPTY EMPTY ID(4 5 6) split2
ID(7 8 9) EMPTY EMPTY
)cpp");
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
// Empty mappings coming from various directives.
recordTokens(R"cpp(
#define ID(X) X
ID(1)
#pragma lalala
not_mapped
)cpp");
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
ValueIs(SameRange(findSpelled("not_mapped"))));
}
TEST_F(TokenBufferTest, ExpandedTokensForRange) {
recordTokens(R"cpp(
#define SIGN(X) X##_washere
A SIGN(B) C SIGN(D) E SIGN(F) G
)cpp");
SourceRange R(findExpanded("C").front().location(),
findExpanded("F_washere").front().location());
// Sanity check: expanded and spelled tokens are stored separately.
EXPECT_THAT(Buffer.expandedTokens(R),
SameRange(findExpanded("C D_washere E F_washere")));
EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
}
TEST_F(TokenBufferTest, ExpansionStartingAt) {
// Object-like macro expansions.
recordTokens(R"cpp(
#define FOO 3+4
int a = FOO 1;
int b = FOO 2;
)cpp");
llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
EXPECT_THAT(
Buffer.expansionStartingAt(Foo1.data()),
ValueIs(IsExpansion(SameRange(Foo1),
SameRange(findExpanded("3 + 4 1").drop_back()))));
llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
EXPECT_THAT(
Buffer.expansionStartingAt(Foo2.data()),
ValueIs(IsExpansion(SameRange(Foo2),
SameRange(findExpanded("3 + 4 2").drop_back()))));
// Function-like macro expansions.
recordTokens(R"cpp(
#define ID(X) X
int a = ID(1+2+3);
int b = ID(ID(2+3+4));
)cpp");
llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
ValueIs(IsExpansion(SameRange(ID1),
SameRange(findExpanded("1 + 2 + 3")))));
// Only the first spelled token should be found.
for (const auto &T : ID1.drop_front())
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
ValueIs(IsExpansion(SameRange(ID2),
SameRange(findExpanded("2 + 3 + 4")))));
// Only the first spelled token should be found.
for (const auto &T : ID2.drop_front())
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
// PP directives.
recordTokens(R"cpp(
#define FOO 1
int a = FOO;
#pragma once
int b = 1;
)cpp");
llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
EXPECT_THAT(
Buffer.expansionStartingAt(&DefineFoo.front()),
ValueIs(IsExpansion(SameRange(DefineFoo),
SameRange(findExpanded("int a").take_front(0)))));
// Only the first spelled token should be found.
for (const auto &T : DefineFoo.drop_front())
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
EXPECT_THAT(
Buffer.expansionStartingAt(&PragmaOnce.front()),
ValueIs(IsExpansion(SameRange(PragmaOnce),
SameRange(findExpanded("int b").take_front(0)))));
// Only the first spelled token should be found.
for (const auto &T : PragmaOnce.drop_front())
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
}
TEST_F(TokenBufferTest, TokensToFileRange) {
addFile("./foo.h", "token_from_header");
llvm::Annotations Code(R"cpp(
#define FOO token_from_expansion
#include "./foo.h"
$all[[$i[[int]] a = FOO;]]
)cpp");
recordTokens(Code.code());
auto &SM = *SourceMgr;
// Two simple examples.
auto Int = findExpanded("int").front();
auto Semi = findExpanded(";").front();
EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
Code.range("i").End));
EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
FileRange(SM.getMainFileID(), Code.range("all").Begin,
Code.range("all").End));
// We don't test assertion failures because death tests are slow.
}
TEST_F(TokenBufferTest, macroExpansions) {
llvm::Annotations Code(R"cpp(
#define FOO B
#define FOO2 BA
#define CALL(X) int X
#define G CALL(FOO2)
int B;
$macro[[FOO]];
$macro[[CALL]](A);
$macro[[G]];
)cpp");
recordTokens(Code.code());
auto &SM = *SourceMgr;
auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
std::vector<FileRange> ExpectedMacroRanges;
for (auto Range : Code.ranges("macro"))
ExpectedMacroRanges.push_back(
FileRange(SM.getMainFileID(), Range.Begin, Range.End));
std::vector<FileRange> ActualMacroRanges;
for (auto Expansion : Expansions)
ActualMacroRanges.push_back(Expansion->range(SM));
EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
}
} // namespace