YAML parser robustness improvements
Summary: This patch fixes a number of bugs found in the YAML parser
through fuzzing. In general, this makes the parser more robust against
malformed inputs.
The fixes are mostly improved null checking and returning errors in
more cases. In some cases, asserts were changed to regular errors,
this provides the same robustness but also protects release builds
from the triggering conditions. This also improves the fuzzability of
the YAML parser since asserts can act as a roadblock to further
fuzzing once they're hit.
Each fix has a corresponding test case:
- TestAnchorMapError - Added proper null pointer handling in
`Stream::printError` if N is null and `KeyValueNode::getValue` if
getKey returns null, `Input::createHNodes` `dyn_casts` changed to
`dyn_cast_or_null` so the null pointer checks are actually able to
fail
- TestFlowSequenceTokenErrors - Added case in
`Document::parseBlockNode` for FlowMappingEnd, FlowSequenceEnd, or
FlowEntry tokens outside of mappings or sequences
- TestDirectiveMappingNoValue - Changed assert to regular error
return in `Scanner::scanValue`
- TestUnescapeInfiniteLoop - Fixed infinite loop in
`ScalarNode::unescapeDoubleQuoted` by returning an error for
unrecognized escape codes
- TestScannerUnexpectedCharacter - Changed asserts to regular error
returns in `Scanner::consume`
- TestUnknownDirective - For both of the inputs the stream doesn't
fail and correctly returns TK_Error, but there is no valid root
node for the document. There's no reasonable way to make the
scanner fail for unknown directives without breaking the YAML spec
(see spec-07-01.test). I think the assert is unnecessary given
that an error is still generated for this case.
The `SimpleKeys.clear()` line fixes a bug found by AddressSanitizer
triggered by multiple test cases - when TokenQueue is cleared
SimpleKeys is still holding dangling pointers into it, so SimpleKeys
should be cleared as well.
Patch by Thomas Finch!
Reviewers: chandlerc, Bigcheese, hintonda
Reviewed By: Bigcheese, hintonda
Subscribers: hintonda, kristina, beanz, dexonsmith, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61608
This commit is contained in:
parent
a7716a3c3c
commit
092452d402
@ -789,6 +789,7 @@ Token &Scanner::peekNext() {
|
||||
if (TokenQueue.empty() || NeedMore) {
|
||||
if (!fetchMoreTokens()) {
|
||||
TokenQueue.clear();
|
||||
SimpleKeys.clear();
|
||||
TokenQueue.push_back(Token());
|
||||
return TokenQueue.front();
|
||||
}
|
||||
@ -932,12 +933,16 @@ void Scanner::scan_ns_uri_char() {
|
||||
}
|
||||
|
||||
bool Scanner::consume(uint32_t Expected) {
|
||||
if (Expected >= 0x80)
|
||||
report_fatal_error("Not dealing with this yet");
|
||||
if (Expected >= 0x80) {
|
||||
setError("Cannot consume non-ascii characters");
|
||||
return false;
|
||||
}
|
||||
if (Current == End)
|
||||
return false;
|
||||
if (uint8_t(*Current) >= 0x80)
|
||||
report_fatal_error("Not dealing with this yet");
|
||||
if (uint8_t(*Current) >= 0x80) {
|
||||
setError("Cannot consume non-ascii characters");
|
||||
return false;
|
||||
}
|
||||
if (uint8_t(*Current) == Expected) {
|
||||
++Current;
|
||||
++Column;
|
||||
@ -1227,7 +1232,10 @@ bool Scanner::scanValue() {
|
||||
if (i == SK.Tok)
|
||||
break;
|
||||
}
|
||||
assert(i != e && "SimpleKey not in token queue!");
|
||||
if (i == e) {
|
||||
Failed = true;
|
||||
return false;
|
||||
}
|
||||
i = TokenQueue.insert(i, T);
|
||||
|
||||
// We may also need to add a Block-Mapping-Start token.
|
||||
@ -1772,10 +1780,11 @@ Stream::~Stream() = default;
|
||||
bool Stream::failed() { return scanner->failed(); }
|
||||
|
||||
void Stream::printError(Node *N, const Twine &Msg) {
|
||||
scanner->printError( N->getSourceRange().Start
|
||||
SMRange Range = N ? N->getSourceRange() : SMRange();
|
||||
scanner->printError( Range.Start
|
||||
, SourceMgr::DK_Error
|
||||
, Msg
|
||||
, N->getSourceRange());
|
||||
, Range);
|
||||
}
|
||||
|
||||
document_iterator Stream::begin() {
|
||||
@ -1934,15 +1943,18 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
|
||||
UnquotedValue = UnquotedValue.substr(1);
|
||||
break;
|
||||
default:
|
||||
if (UnquotedValue.size() == 1)
|
||||
// TODO: Report error.
|
||||
break;
|
||||
if (UnquotedValue.size() == 1) {
|
||||
Token T;
|
||||
T.Range = StringRef(UnquotedValue.begin(), 1);
|
||||
setError("Unrecognized escape code", T);
|
||||
return "";
|
||||
}
|
||||
UnquotedValue = UnquotedValue.substr(1);
|
||||
switch (UnquotedValue[0]) {
|
||||
default: {
|
||||
Token T;
|
||||
T.Range = StringRef(UnquotedValue.begin(), 1);
|
||||
setError("Unrecognized escape code!", T);
|
||||
setError("Unrecognized escape code", T);
|
||||
return "";
|
||||
}
|
||||
case '\r':
|
||||
@ -2078,7 +2090,14 @@ Node *KeyValueNode::getKey() {
|
||||
Node *KeyValueNode::getValue() {
|
||||
if (Value)
|
||||
return Value;
|
||||
getKey()->skip();
|
||||
|
||||
if (Node* Key = getKey())
|
||||
Key->skip();
|
||||
else {
|
||||
setError("Null key in Key Value.", peekNext());
|
||||
return Value = new (getAllocator()) NullNode(Doc);
|
||||
}
|
||||
|
||||
if (failed())
|
||||
return Value = new (getAllocator()) NullNode(Doc);
|
||||
|
||||
@ -2394,6 +2413,15 @@ parse_property:
|
||||
// TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
|
||||
// !!null null.
|
||||
return new (NodeAllocator) NullNode(stream.CurrentDoc);
|
||||
case Token::TK_FlowMappingEnd:
|
||||
case Token::TK_FlowSequenceEnd:
|
||||
case Token::TK_FlowEntry: {
|
||||
if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
|
||||
return new (NodeAllocator) NullNode(stream.CurrentDoc);
|
||||
|
||||
setError("Unexpected token", T);
|
||||
return nullptr;
|
||||
}
|
||||
case Token::TK_Error:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -87,7 +87,6 @@ bool Input::setCurrentDocument() {
|
||||
if (DocIterator != Strm->end()) {
|
||||
Node *N = DocIterator->getRoot();
|
||||
if (!N) {
|
||||
assert(Strm->failed() && "Root is NULL iff parsing failed");
|
||||
EC = make_error_code(errc::invalid_argument);
|
||||
return false;
|
||||
}
|
||||
@ -394,7 +393,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
|
||||
auto mapHNode = std::make_unique<MapHNode>(N);
|
||||
for (KeyValueNode &KVN : *Map) {
|
||||
Node *KeyNode = KVN.getKey();
|
||||
ScalarNode *Key = dyn_cast<ScalarNode>(KeyNode);
|
||||
ScalarNode *Key = dyn_cast_or_null<ScalarNode>(KeyNode);
|
||||
Node *Value = KVN.getValue();
|
||||
if (!Key || !Value) {
|
||||
if (!Key)
|
||||
|
||||
@ -3050,3 +3050,55 @@ TEST(YAMLIO, TestReadWritePolymorphicMap) {
|
||||
EXPECT_EQ(bar->DoubleValue, 2.0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(YAMLIO, TestAnchorMapError) {
|
||||
Input yin("& & &: ");
|
||||
yin.setCurrentDocument();
|
||||
EXPECT_TRUE(yin.error());
|
||||
}
|
||||
|
||||
TEST(YAMLIO, TestFlowSequenceTokenErrors) {
|
||||
Input yin(",");
|
||||
EXPECT_FALSE(yin.setCurrentDocument());
|
||||
EXPECT_TRUE(yin.error());
|
||||
|
||||
Input yin2("]");
|
||||
EXPECT_FALSE(yin2.setCurrentDocument());
|
||||
EXPECT_TRUE(yin2.error());
|
||||
|
||||
Input yin3("}");
|
||||
EXPECT_FALSE(yin3.setCurrentDocument());
|
||||
EXPECT_TRUE(yin3.error());
|
||||
}
|
||||
|
||||
TEST(YAMLIO, TestDirectiveMappingNoValue) {
|
||||
Input yin("%YAML\n{5:");
|
||||
EXPECT_FALSE(yin.setCurrentDocument());
|
||||
EXPECT_TRUE(yin.error());
|
||||
|
||||
Input yin2("%TAG\n'\x98!< :\n");
|
||||
yin2.setCurrentDocument();
|
||||
EXPECT_TRUE(yin2.error());
|
||||
}
|
||||
|
||||
TEST(YAMLIO, TestUnescapeInfiniteLoop) {
|
||||
Input yin("\"\\u\\^#\\\\\"");
|
||||
yin.setCurrentDocument();
|
||||
EXPECT_TRUE(yin.error());
|
||||
}
|
||||
|
||||
TEST(YAMLIO, TestScannerUnexpectedCharacter) {
|
||||
Input yin("!<$\x9F.");
|
||||
EXPECT_FALSE(yin.setCurrentDocument());
|
||||
EXPECT_TRUE(yin.error());
|
||||
}
|
||||
|
||||
TEST(YAMLIO, TestUnknownDirective) {
|
||||
Input yin("%");
|
||||
EXPECT_FALSE(yin.setCurrentDocument());
|
||||
EXPECT_TRUE(yin.error());
|
||||
|
||||
Input yin2("%)");
|
||||
EXPECT_FALSE(yin2.setCurrentDocument());
|
||||
EXPECT_TRUE(yin2.error());
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user