From e94552c9ca883f8c4f2cead24355a60ecba0efb2 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 12 Feb 2015 08:12:12 -0800 Subject: DOCTYPE parsing is now stackless This prevents malformed input XML with very deeply recursive DOCTYPE sections from crashing the parser. Fixes #29. --- src/pugixml.cpp | 37 ++++++++++++++++++++++--------------- tests/test_parse_doctype.cpp | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 15 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 265337a..0f696ab 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -2357,23 +2357,28 @@ PUGI__NS_BEGIN char_t* parse_doctype_ignore(char_t* s) { + size_t depth = 0; + assert(s[0] == '<' && s[1] == '!' && s[2] == '['); - s++; + s += 3; while (*s) { if (s[0] == '<' && s[1] == '!' && s[2] == '[') { // nested ignore section - s = parse_doctype_ignore(s); - if (!s) return s; + s += 3; + depth++; } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') { // ignore section end s += 3; - return s; + if (depth == 0) + return s; + + depth--; } else s++; } @@ -2381,10 +2386,12 @@ PUGI__NS_BEGIN PUGI__THROW_ERROR(status_bad_doctype, s); } - char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel) + char_t* parse_doctype_group(char_t* s, char_t endch) { + size_t depth = 0; + assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); - s++; + s += 2; while (*s) { @@ -2399,12 +2406,8 @@ PUGI__NS_BEGIN else { // some control group - s = parse_doctype_group(s, endch, false); - if (!s) return s; - - // skip > - assert(*s == '>'); - s++; + s += 2; + depth++; } } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') @@ -2415,12 +2418,16 @@ PUGI__NS_BEGIN } else if (*s == '>') { - return s; + if (depth == 0) + return s; + + depth--; + s++; } else s++; } - if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); + if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); return s; } @@ -2512,7 +2519,7 @@ PUGI__NS_BEGIN char_t* mark = s + 9; - s = parse_doctype_group(s, endch, true); + s = parse_doctype_group(s, endch); if (!s) return s; assert((*s == 0 && endch == '>') || *s == '>'); diff --git a/tests/test_parse_doctype.cpp b/tests/test_parse_doctype.cpp index 14268f6..646ebbf 100644 --- a/tests/test_parse_doctype.cpp +++ b/tests/test_parse_doctype.cpp @@ -322,3 +322,43 @@ TEST(parse_doctype_error_ignore) CHECK(doc.load_string(STR(" str; + + int count = 100000; + + str += ""); + + str += ">"; + + xml_document doc; + CHECK(doc.load_string(str.c_str(), parse_fragment)); +} + +TEST(parse_doctype_stackless_ignore) +{ + std::basic_string str; + + int count = 100000; + + str += ""); + + str += ">"; + + xml_document doc; + CHECK(doc.load_string(str.c_str(), parse_fragment)); +} -- cgit v1.2.3