Kate Stone b9c1b51e45 *** This commit represents a complete reformatting of the LLDB source code
*** to conform to clang-format’s LLVM style.  This kind of mass change has
*** two obvious implications:

Firstly, merging this particular commit into a downstream fork may be a huge
effort.  Alternatively, it may be worth merging all changes up to this commit,
performing the same reformatting operation locally, and then discarding the
merge for this particular commit.  The commands used to accomplish this
reformatting were as follows (with current working directory as the root of
the repository):

    find . \( -iname "*.c" -or -iname "*.cpp" -or -iname "*.h" -or -iname "*.mm" \) -exec clang-format -i {} +
    find . -iname "*.py" -exec autopep8 --in-place --aggressive --aggressive {} + ;

The version of clang-format used was 3.9.0, and autopep8 was 1.2.4.

Secondly, “blame” style tools will generally point to this commit instead of
a meaningful prior commit.  There are alternatives available that will attempt
to look through this change and find the appropriate prior commit.  YMMV.

llvm-svn: 280751
2016-09-06 20:57:50 +00:00

351 lines
7.8 KiB
C++

//===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include <string.h>
#include "GoLexer.h"
using namespace lldb_private;
llvm::StringMap<GoLexer::TokenType> *GoLexer::m_keywords;
GoLexer::GoLexer(const char *src)
: m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "") {}
bool GoLexer::SkipWhitespace() {
bool saw_newline = false;
for (; m_src < m_end; ++m_src) {
if (*m_src == '\n')
saw_newline = true;
if (*m_src == '/' && !SkipComment())
return saw_newline;
else if (!IsWhitespace(*m_src))
return saw_newline;
}
return saw_newline;
}
bool GoLexer::SkipComment() {
if (m_src[0] == '/' && m_src[1] == '/') {
for (const char *c = m_src + 2; c < m_end; ++c) {
if (*c == '\n') {
m_src = c - 1;
return true;
}
}
return true;
} else if (m_src[0] == '/' && m_src[1] == '*') {
for (const char *c = m_src + 2; c < m_end; ++c) {
if (c[0] == '*' && c[1] == '/') {
m_src = c + 1;
return true;
}
}
}
return false;
}
const GoLexer::Token &GoLexer::Lex() {
bool newline = SkipWhitespace();
const char *start = m_src;
m_last_token.m_type = InternalLex(newline);
m_last_token.m_value = llvm::StringRef(start, m_src - start);
return m_last_token;
}
GoLexer::TokenType GoLexer::InternalLex(bool newline) {
if (m_src >= m_end) {
return TOK_EOF;
}
if (newline) {
switch (m_last_token.m_type) {
case TOK_IDENTIFIER:
case LIT_FLOAT:
case LIT_IMAGINARY:
case LIT_INTEGER:
case LIT_RUNE:
case LIT_STRING:
case KEYWORD_BREAK:
case KEYWORD_CONTINUE:
case KEYWORD_FALLTHROUGH:
case KEYWORD_RETURN:
case OP_PLUS_PLUS:
case OP_MINUS_MINUS:
case OP_RPAREN:
case OP_RBRACK:
case OP_RBRACE:
return OP_SEMICOLON;
default:
break;
}
}
char c = *m_src;
switch (c) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return DoNumber();
case '+':
case '-':
case '*':
case '/':
case '%':
case '&':
case '|':
case '^':
case '<':
case '>':
case '!':
case ':':
case ';':
case '(':
case ')':
case '[':
case ']':
case '{':
case '}':
case ',':
case '=':
return DoOperator();
case '.':
if (IsDecimal(m_src[1]))
return DoNumber();
return DoOperator();
case '$':
// For lldb persistent vars.
return DoIdent();
case '"':
case '`':
return DoString();
case '\'':
return DoRune();
default:
break;
}
if (IsLetterOrDigit(c))
return DoIdent();
++m_src;
return TOK_INVALID;
}
GoLexer::TokenType GoLexer::DoOperator() {
TokenType t = TOK_INVALID;
if (m_end - m_src > 2) {
t = LookupKeyword(llvm::StringRef(m_src, 3));
if (t != TOK_INVALID)
m_src += 3;
}
if (t == TOK_INVALID && m_end - m_src > 1) {
t = LookupKeyword(llvm::StringRef(m_src, 2));
if (t != TOK_INVALID)
m_src += 2;
}
if (t == TOK_INVALID) {
t = LookupKeyword(llvm::StringRef(m_src, 1));
++m_src;
}
return t;
}
GoLexer::TokenType GoLexer::DoIdent() {
const char *start = m_src++;
while (m_src < m_end && IsLetterOrDigit(*m_src)) {
++m_src;
}
TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start));
if (kw != TOK_INVALID)
return kw;
return TOK_IDENTIFIER;
}
GoLexer::TokenType GoLexer::DoNumber() {
if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X')) {
m_src += 2;
while (IsHexChar(*m_src))
++m_src;
return LIT_INTEGER;
}
bool dot_ok = true;
bool e_ok = true;
while (true) {
while (IsDecimal(*m_src))
++m_src;
switch (*m_src) {
case 'i':
++m_src;
return LIT_IMAGINARY;
case '.':
if (!dot_ok)
return LIT_FLOAT;
++m_src;
dot_ok = false;
break;
case 'e':
case 'E':
if (!e_ok)
return LIT_FLOAT;
dot_ok = e_ok = false;
++m_src;
if (*m_src == '+' || *m_src == '-')
++m_src;
break;
default:
if (dot_ok)
return LIT_INTEGER;
return LIT_FLOAT;
}
}
}
GoLexer::TokenType GoLexer::DoRune() {
while (++m_src < m_end) {
switch (*m_src) {
case '\'':
++m_src;
return LIT_RUNE;
case '\n':
return TOK_INVALID;
case '\\':
if (m_src[1] == '\n')
return TOK_INVALID;
++m_src;
}
}
return TOK_INVALID;
}
GoLexer::TokenType GoLexer::DoString() {
if (*m_src == '`') {
while (++m_src < m_end) {
if (*m_src == '`') {
++m_src;
return LIT_STRING;
}
}
return TOK_INVALID;
}
while (++m_src < m_end) {
switch (*m_src) {
case '"':
++m_src;
return LIT_STRING;
case '\n':
return TOK_INVALID;
case '\\':
if (m_src[1] == '\n')
return TOK_INVALID;
++m_src;
}
}
return TOK_INVALID;
}
GoLexer::TokenType GoLexer::LookupKeyword(llvm::StringRef id) {
if (m_keywords == nullptr)
m_keywords = InitKeywords();
const auto &it = m_keywords->find(id);
if (it == m_keywords->end())
return TOK_INVALID;
return it->second;
}
llvm::StringRef GoLexer::LookupToken(TokenType t) {
if (m_keywords == nullptr)
m_keywords = InitKeywords();
for (const auto &e : *m_keywords) {
if (e.getValue() == t)
return e.getKey();
}
return "";
}
llvm::StringMap<GoLexer::TokenType> *GoLexer::InitKeywords() {
auto &result = *new llvm::StringMap<TokenType>(128);
result["break"] = KEYWORD_BREAK;
result["default"] = KEYWORD_DEFAULT;
result["func"] = KEYWORD_FUNC;
result["interface"] = KEYWORD_INTERFACE;
result["select"] = KEYWORD_SELECT;
result["case"] = KEYWORD_CASE;
result["defer"] = KEYWORD_DEFER;
result["go"] = KEYWORD_GO;
result["map"] = KEYWORD_MAP;
result["struct"] = KEYWORD_STRUCT;
result["chan"] = KEYWORD_CHAN;
result["else"] = KEYWORD_ELSE;
result["goto"] = KEYWORD_GOTO;
result["package"] = KEYWORD_PACKAGE;
result["switch"] = KEYWORD_SWITCH;
result["const"] = KEYWORD_CONST;
result["fallthrough"] = KEYWORD_FALLTHROUGH;
result["if"] = KEYWORD_IF;
result["range"] = KEYWORD_RANGE;
result["type"] = KEYWORD_TYPE;
result["continue"] = KEYWORD_CONTINUE;
result["for"] = KEYWORD_FOR;
result["import"] = KEYWORD_IMPORT;
result["return"] = KEYWORD_RETURN;
result["var"] = KEYWORD_VAR;
result["+"] = OP_PLUS;
result["-"] = OP_MINUS;
result["*"] = OP_STAR;
result["/"] = OP_SLASH;
result["%"] = OP_PERCENT;
result["&"] = OP_AMP;
result["|"] = OP_PIPE;
result["^"] = OP_CARET;
result["<<"] = OP_LSHIFT;
result[">>"] = OP_RSHIFT;
result["&^"] = OP_AMP_CARET;
result["+="] = OP_PLUS_EQ;
result["-="] = OP_MINUS_EQ;
result["*="] = OP_STAR_EQ;
result["/="] = OP_SLASH_EQ;
result["%="] = OP_PERCENT_EQ;
result["&="] = OP_AMP_EQ;
result["|="] = OP_PIPE_EQ;
result["^="] = OP_CARET_EQ;
result["<<="] = OP_LSHIFT_EQ;
result[">>="] = OP_RSHIFT_EQ;
result["&^="] = OP_AMP_CARET_EQ;
result["&&"] = OP_AMP_AMP;
result["||"] = OP_PIPE_PIPE;
result["<-"] = OP_LT_MINUS;
result["++"] = OP_PLUS_PLUS;
result["--"] = OP_MINUS_MINUS;
result["=="] = OP_EQ_EQ;
result["<"] = OP_LT;
result[">"] = OP_GT;
result["="] = OP_EQ;
result["!"] = OP_BANG;
result["!="] = OP_BANG_EQ;
result["<="] = OP_LT_EQ;
result[">="] = OP_GT_EQ;
result[":="] = OP_COLON_EQ;
result["..."] = OP_DOTS;
result["("] = OP_LPAREN;
result["["] = OP_LBRACK;
result["{"] = OP_LBRACE;
result[","] = OP_COMMA;
result["."] = OP_DOT;
result[")"] = OP_RPAREN;
result["]"] = OP_RBRACK;
result["}"] = OP_RBRACE;
result[";"] = OP_SEMICOLON;
result[":"] = OP_COLON;
return &result;
}