[LLDB] Add array subscription and integer parsing to DIL (#141102)

Reapply #138551 with an xfailed test on Windows
This commit is contained in:
Ilia Kuklin 2025-05-25 21:09:33 +05:00 committed by GitHub
parent 014f4e95e0
commit fe51d8ae57
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 353 additions and 24 deletions

View File

@ -10,16 +10,17 @@ unary_expression = postfix_expression
unary_operator = "*" | "&" ;
postfix_expresson = primary_expression
| postfix_expression "." id_expression
| postfix_expression "->" id_expression ;
postfix_expression = primary_expression
| postfix_expression "[" integer_literal "]"
| postfix_expression "." id_expression
| postfix_expression "->" id_expression ;
primary_expression = id_expression
| "(" expression ")" ;
| "(" expression ")" ;
id_expression = unqualified_id
| qualified_id
| register ;
| register ;
unqualified_id = identifier ;
@ -28,6 +29,8 @@ qualified_id = ["::"] [nested_name_specifier] unqualified_id
identifier = ? C99 Identifier ? ;
integer_literal = ? Integer constant: hexademical, decimal, octal, binary ? ;
register = "$" ? Register name ? ;
nested_name_specifier = type_name "::"

View File

@ -18,6 +18,7 @@ namespace lldb_private::dil {
/// The various types DIL AST nodes (used by the DIL parser).
enum class NodeKind {
eArraySubscriptNode,
eErrorNode,
eIdentifierNode,
eMemberOfNode,
@ -120,8 +121,8 @@ public:
llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
UnaryOpKind kind() const { return m_kind; }
ASTNode *operand() const { return m_operand.get(); }
UnaryOpKind GetKind() const { return m_kind; }
ASTNode *GetOperand() const { return m_operand.get(); }
static bool classof(const ASTNode *node) {
return node->GetKind() == NodeKind::eUnaryOpNode;
@ -132,6 +133,26 @@ private:
ASTNodeUP m_operand;
};
class ArraySubscriptNode : public ASTNode {
public:
ArraySubscriptNode(uint32_t location, ASTNodeUP base, int64_t index)
: ASTNode(location, NodeKind::eArraySubscriptNode),
m_base(std::move(base)), m_index(index) {}
llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
ASTNode *GetBase() const { return m_base.get(); }
int64_t GetIndex() const { return m_index; }
static bool classof(const ASTNode *node) {
return node->GetKind() == NodeKind::eArraySubscriptNode;
}
private:
ASTNodeUP m_base;
int64_t m_index;
};
/// This class contains one Visit method for each specialized type of
/// DIL AST node. The Visit methods are used to dispatch a DIL AST node to
/// the correct function in the DIL expression evaluator for evaluating that
@ -145,6 +166,8 @@ public:
Visit(const MemberOfNode *node) = 0;
virtual llvm::Expected<lldb::ValueObjectSP>
Visit(const UnaryOpNode *node) = 0;
virtual llvm::Expected<lldb::ValueObjectSP>
Visit(const ArraySubscriptNode *node) = 0;
};
} // namespace lldb_private::dil

View File

@ -52,6 +52,8 @@ private:
Visit(const IdentifierNode *node) override;
llvm::Expected<lldb::ValueObjectSP> Visit(const MemberOfNode *node) override;
llvm::Expected<lldb::ValueObjectSP> Visit(const UnaryOpNode *node) override;
llvm::Expected<lldb::ValueObjectSP>
Visit(const ArraySubscriptNode *node) override;
// Used by the interpreter to create objects, perform casts, etc.
lldb::TargetSP m_target;

View File

@ -30,8 +30,11 @@ public:
eof,
identifier,
l_paren,
l_square,
numeric_constant,
period,
r_paren,
r_square,
star,
};

View File

@ -95,6 +95,7 @@ private:
std::string ParseIdExpression();
std::string ParseUnqualifiedId();
std::optional<int64_t> ParseIntegerConstant();
void BailOut(const std::string &error, uint32_t loc, uint16_t err_len);

View File

@ -27,4 +27,9 @@ llvm::Expected<lldb::ValueObjectSP> UnaryOpNode::Accept(Visitor *v) const {
return v->Visit(this);
}
llvm::Expected<lldb::ValueObjectSP>
ArraySubscriptNode::Accept(Visitor *v) const {
return v->Visit(this);
}
} // namespace lldb_private::dil

View File

@ -240,13 +240,13 @@ Interpreter::Visit(const IdentifierNode *node) {
llvm::Expected<lldb::ValueObjectSP>
Interpreter::Visit(const UnaryOpNode *node) {
Status error;
auto rhs_or_err = Evaluate(node->operand());
auto rhs_or_err = Evaluate(node->GetOperand());
if (!rhs_or_err)
return rhs_or_err;
lldb::ValueObjectSP rhs = *rhs_or_err;
switch (node->kind()) {
switch (node->GetKind()) {
case UnaryOpKind::Deref: {
lldb::ValueObjectSP dynamic_rhs = rhs->GetDynamicValue(m_use_dynamic);
if (dynamic_rhs)
@ -383,4 +383,51 @@ Interpreter::Visit(const MemberOfNode *node) {
m_expr, errMsg, node->GetLocation(), node->GetFieldName().size());
}
llvm::Expected<lldb::ValueObjectSP>
Interpreter::Visit(const ArraySubscriptNode *node) {
auto lhs_or_err = Evaluate(node->GetBase());
if (!lhs_or_err)
return lhs_or_err;
lldb::ValueObjectSP base = *lhs_or_err;
// Check to see if 'base' has a synthetic value; if so, try using that.
uint64_t child_idx = node->GetIndex();
if (lldb::ValueObjectSP synthetic = base->GetSyntheticValue()) {
llvm::Expected<uint32_t> num_children =
synthetic->GetNumChildren(child_idx + 1);
if (!num_children)
return llvm::make_error<DILDiagnosticError>(
m_expr, toString(num_children.takeError()), node->GetLocation());
if (child_idx >= *num_children) {
std::string message = llvm::formatv(
"array index {0} is not valid for \"({1}) {2}\"", child_idx,
base->GetTypeName().AsCString("<invalid type>"),
base->GetName().AsCString());
return llvm::make_error<DILDiagnosticError>(m_expr, message,
node->GetLocation());
}
if (lldb::ValueObjectSP child_valobj_sp =
synthetic->GetChildAtIndex(child_idx))
return child_valobj_sp;
}
auto base_type = base->GetCompilerType().GetNonReferenceType();
if (!base_type.IsPointerType() && !base_type.IsArrayType())
return llvm::make_error<DILDiagnosticError>(
m_expr, "subscripted value is not an array or pointer",
node->GetLocation());
if (base_type.IsPointerToVoid())
return llvm::make_error<DILDiagnosticError>(
m_expr, "subscript of pointer to incomplete type 'void'",
node->GetLocation());
if (base_type.IsArrayType()) {
if (lldb::ValueObjectSP child_valobj_sp = base->GetChildAtIndex(child_idx))
return child_valobj_sp;
}
int64_t signed_child_idx = node->GetIndex();
return base->GetSyntheticArrayMember(signed_child_idx, true);
}
} // namespace lldb_private::dil

View File

@ -13,6 +13,7 @@
#include "lldb/ValueObject/DILLexer.h"
#include "lldb/Utility/Status.h"
#include "lldb/ValueObject/DILParser.h"
#include "llvm/ADT/StringSwitch.h"
namespace lldb_private::dil {
@ -31,10 +32,16 @@ llvm::StringRef Token::GetTokenName(Kind kind) {
return "identifier";
case Kind::l_paren:
return "l_paren";
case Kind::l_square:
return "l_square";
case Kind::numeric_constant:
return "numeric_constant";
case Kind::period:
return "period";
case Kind::r_paren:
return "r_paren";
case Kind::r_square:
return "r_square";
case Token::star:
return "star";
}
@ -61,6 +68,18 @@ static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
return candidate;
}
static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); }
static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
llvm::StringRef &remainder) {
if (IsDigit(remainder[0])) {
llvm::StringRef number = remainder.take_while(IsNumberBodyChar);
remainder = remainder.drop_front(number.size());
return number;
}
return std::nullopt;
}
llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
std::vector<Token> tokens;
llvm::StringRef remainder = expr;
@ -85,14 +104,17 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
return Token(Token::eof, "", (uint32_t)expr.size());
uint32_t position = cur_pos - expr.begin();
std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
if (maybe_number)
return Token(Token::numeric_constant, maybe_number->str(), position);
std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
if (maybe_word)
return Token(Token::identifier, maybe_word->str(), position);
constexpr std::pair<Token::Kind, const char *> operators[] = {
{Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"},
{Token::l_paren, "("}, {Token::period, "."}, {Token::r_paren, ")"},
{Token::star, "*"},
{Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"},
{Token::l_paren, "("}, {Token::l_square, "["}, {Token::period, "."},
{Token::r_paren, ")"}, {Token::r_square, "]"}, {Token::star, "*"},
};
for (auto [kind, str] : operators) {
if (remainder.consume_front(str))
@ -100,7 +122,8 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
}
// Unrecognized character(s) in string; unable to lex it.
return llvm::createStringError("Unable to lex input string");
return llvm::make_error<DILDiagnosticError>(expr, "unrecognized token",
position);
}
} // namespace lldb_private::dil

View File

@ -119,20 +119,46 @@ ASTNodeUP DILParser::ParseUnaryExpression() {
//
// postfix_expression:
// primary_expression
// postfix_expression "[" integer_literal "]"
// postfix_expression "." id_expression
// postfix_expression "->" id_expression
//
ASTNodeUP DILParser::ParsePostfixExpression() {
ASTNodeUP lhs = ParsePrimaryExpression();
while (CurToken().IsOneOf({Token::period, Token::arrow})) {
while (CurToken().IsOneOf({Token::l_square, Token::period, Token::arrow})) {
uint32_t loc = CurToken().GetLocation();
Token token = CurToken();
m_dil_lexer.Advance();
Token member_token = CurToken();
std::string member_id = ParseIdExpression();
lhs = std::make_unique<MemberOfNode>(
member_token.GetLocation(), std::move(lhs),
token.GetKind() == Token::arrow, member_id);
switch (token.GetKind()) {
case Token::l_square: {
m_dil_lexer.Advance();
std::optional<int64_t> rhs = ParseIntegerConstant();
if (!rhs) {
BailOut(
llvm::formatv("failed to parse integer constant: {0}", CurToken()),
CurToken().GetLocation(), CurToken().GetSpelling().length());
return std::make_unique<ErrorNode>();
}
Expect(Token::r_square);
m_dil_lexer.Advance();
lhs = std::make_unique<ArraySubscriptNode>(loc, std::move(lhs),
std::move(*rhs));
break;
}
case Token::period:
case Token::arrow: {
m_dil_lexer.Advance();
Token member_token = CurToken();
std::string member_id = ParseIdExpression();
lhs = std::make_unique<MemberOfNode>(
member_token.GetLocation(), std::move(lhs),
token.GetKind() == Token::arrow, member_id);
break;
}
default:
llvm_unreachable("invalid token");
}
}
return lhs;
}
@ -302,6 +328,23 @@ void DILParser::BailOut(const std::string &error, uint32_t loc,
m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
}
// Parse a integer_literal.
//
// integer_literal:
// ? Integer constant ?
//
std::optional<int64_t> DILParser::ParseIntegerConstant() {
auto spelling = CurToken().GetSpelling();
llvm::StringRef spelling_ref = spelling;
int64_t raw_value;
if (!spelling_ref.getAsInteger<int64_t>(0, raw_value)) {
m_dil_lexer.Advance();
return raw_value;
}
return std::nullopt;
}
void DILParser::Expect(Token::Kind kind) {
if (CurToken().IsNot(kind)) {
BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),

View File

@ -0,0 +1,3 @@
CXX_SOURCES := main.cpp
include Makefile.rules

View File

@ -0,0 +1,119 @@
"""
Test DIL array subscript.
"""
import lldb
from lldbsuite.test.lldbtest import *
from lldbsuite.test.decorators import *
from lldbsuite.test import lldbutil
class TestFrameVarDILArraySubscript(TestBase):
NO_DEBUG_INFO_TESTCASE = True
def expect_var_path(self, expr, compare_to_framevar=False, value=None, type=None):
value_dil = super().expect_var_path(expr, value=value, type=type)
if compare_to_framevar:
self.runCmd("settings set target.experimental.use-DIL false")
value_frv = super().expect_var_path(expr, value=value, type=type)
self.runCmd("settings set target.experimental.use-DIL true")
self.assertEqual(value_dil.GetValue(), value_frv.GetValue())
def test_subscript(self):
self.build()
lldbutil.run_to_source_breakpoint(
self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp")
)
self.runCmd("settings set target.experimental.use-DIL true")
# Test int[] and int*
self.expect_var_path("int_arr[0]", True, value="1")
self.expect_var_path("int_ptr[1]", True, value="2")
self.expect("frame var 'int_arr[enum_one]'", error=True)
# Test when base and index are references.
self.expect_var_path("int_arr[0]", True, value="1")
self.expect("frame var 'int_arr[idx_1_ref]'", error=True)
self.expect("frame var 'int_arr[enum_ref]'", error=True)
self.expect_var_path("int_arr_ref[0]", value="1")
self.expect("frame var 'int_arr_ref[idx_1_ref]'", error=True)
self.expect("frame var 'int_arr_ref[enum_ref]'", error=True)
# Test when base and index are typedefs.
self.expect_var_path("td_int_arr[0]", True, value="1")
self.expect("frame var 'td_int_arr[td_int_idx_1]'", error=True)
self.expect("frame var 'td_int_arr[td_td_int_idx_2]'", error=True)
self.expect_var_path("td_int_ptr[0]", True, value="1")
self.expect("frame var 'td_int_ptr[td_int_idx_1]'", error=True)
self.expect("frame var 'td_int_ptr[td_td_int_idx_2]'", error=True)
# Both typedefs and refs
self.expect("frame var 'td_int_arr_ref[td_int_idx_1_ref]'", error=True)
# Test for index out of bounds.
self.expect_var_path("int_arr[42]", True, type="int")
self.expect_var_path("int_arr[100]", True, type="int")
# Test address-of of the subscripted value.
self.expect_var_path("*(&int_arr[1])", value="2")
# Test for negative index.
self.expect(
"frame var 'int_arr[-1]'",
error=True,
substrs=["unrecognized token"],
)
# Test for floating point index
self.expect(
"frame var 'int_arr[1.0]'",
error=True,
substrs=["expected 'r_square', got: <'.'"],
)
# Base should be a "pointer to T" and index should be of an integral type.
self.expect(
"frame var 'idx_1[0]'",
error=True,
substrs=["subscripted value is not an array or pointer"],
)
self.expect(
"frame var 'idx_1_ref[0]'",
error=True,
substrs=["subscripted value is not an array or pointer"],
)
self.expect(
"frame var 'int_arr[int_ptr]'",
error=True,
substrs=["failed to parse integer constant"],
)
self.expect(
"frame var '1[2]'",
error=True,
substrs=["Unexpected token"],
)
# Base should not be a pointer to void
self.expect(
"frame var 'p_void[0]'",
error=True,
substrs=["subscript of pointer to incomplete type 'void'"],
)
@expectedFailureAll(oslist=["windows"])
def test_subscript_synthetic(self):
self.build()
lldbutil.run_to_source_breakpoint(
self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp")
)
self.runCmd("settings set target.experimental.use-DIL true")
# Test synthetic value subscription
self.expect_var_path("vector[1]", value="2")
self.expect(
"frame var 'vector[100]'",
error=True,
substrs=["array index 100 is not valid"],
)

View File

@ -0,0 +1,32 @@
#include <vector>
int main(int argc, char **argv) {
int int_arr[] = {1, 2, 3};
int *int_ptr = int_arr;
int(&int_arr_ref)[3] = int_arr;
void *p_void = (void *)int_arr;
int idx_1 = 1;
const int &idx_1_ref = idx_1;
typedef int td_int_t;
typedef td_int_t td_td_int_t;
typedef int *td_int_ptr_t;
typedef int &td_int_ref_t;
td_int_t td_int_idx_1 = 1;
td_td_int_t td_td_int_idx_2 = 2;
td_int_t td_int_arr[3] = {1, 2, 3};
td_int_ptr_t td_int_ptr = td_int_arr;
td_int_ref_t td_int_idx_1_ref = td_int_idx_1;
td_int_t(&td_int_arr_ref)[3] = td_int_arr;
enum Enum { kZero, kOne } enum_one = kOne;
Enum &enum_ref = enum_one;
std::vector<int> vector = {1, 2, 3};
return 0; // Set a breakpoint here
}

View File

@ -121,11 +121,11 @@ TEST(DILLexerTests, IdentifiersTest) {
"a_b", "this", "self", "a", "MyName", "namespace"};
// The lexer can lex these strings, but they should not be identifiers.
std::vector<std::string> invalid_identifiers = {"", "::", "(", ")"};
std::vector<std::string> invalid_identifiers = {"", "::", "(", ")", "0abc"};
// The lexer is expected to fail attempting to lex these strings (it cannot
// create valid tokens out of them).
std::vector<std::string> invalid_tok_strings = {"234", "2a", "2", "1MyName"};
std::vector<std::string> invalid_tok_strings = {"#include", "a@a"};
// Verify that all of the valid identifiers come out as identifier tokens.
for (auto &str : valid_identifiers) {
@ -150,7 +150,32 @@ TEST(DILLexerTests, IdentifiersTest) {
DILLexer lexer(*maybe_lexer);
Token token = lexer.GetCurrentToken();
EXPECT_TRUE(token.IsNot(Token::identifier));
EXPECT_TRUE(token.IsOneOf(
{Token::eof, Token::coloncolon, Token::l_paren, Token::r_paren}));
EXPECT_TRUE(token.IsOneOf({Token::eof, Token::coloncolon, Token::l_paren,
Token::r_paren, Token::numeric_constant}));
}
}
TEST(DILLexerTests, NumbersTest) {
// These strings should lex into number tokens.
std::vector<std::string> valid_numbers = {"123", "0x123", "0123", "0b101"};
// The lexer can lex these strings, but they should not be numbers.
std::vector<std::string> invalid_numbers = {"", "x123", "b123"};
for (auto &str : valid_numbers) {
SCOPED_TRACE(str);
EXPECT_THAT_EXPECTED(ExtractTokenData(str),
llvm::HasValue(testing::ElementsAre(
testing::Pair(Token::numeric_constant, str))));
}
// Verify that none of the invalid numbers come out as numeric tokens.
for (auto &str : invalid_numbers) {
SCOPED_TRACE(str);
llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(str);
EXPECT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
DILLexer lexer(*maybe_lexer);
Token token = lexer.GetCurrentToken();
EXPECT_TRUE(token.IsNot(Token::numeric_constant));
EXPECT_TRUE(token.IsOneOf({Token::eof, Token::identifier}));
}
}