diff options
author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2015-02-12 08:12:12 -0800 |
---|---|---|
committer | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2015-02-12 08:12:12 -0800 |
commit | e94552c9ca883f8c4f2cead24355a60ecba0efb2 (patch) | |
tree | 0aa1f9ed3d61c110d458f4c044920bd5998460fe | |
parent | 00b4b0192f88392e80f1c504526c7e73f4d16ec7 (diff) |
DOCTYPE parsing is now stackless
This prevents malformed input XML with very deeply recursive DOCTYPE sections
from crashing the parser.
Fixes #29.
-rw-r--r-- | src/pugixml.cpp | 37 | ||||
-rw-r--r-- | tests/test_parse_doctype.cpp | 40 |
2 files changed, 62 insertions, 15 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 265337a..0f696ab 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -2357,23 +2357,28 @@ PUGI__NS_BEGIN char_t* parse_doctype_ignore(char_t* s) { + size_t depth = 0; + assert(s[0] == '<' && s[1] == '!' && s[2] == '['); - s++; + s += 3; while (*s) { if (s[0] == '<' && s[1] == '!' && s[2] == '[') { // nested ignore section - s = parse_doctype_ignore(s); - if (!s) return s; + s += 3; + depth++; } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') { // ignore section end s += 3; - return s; + if (depth == 0) + return s; + + depth--; } else s++; } @@ -2381,10 +2386,12 @@ PUGI__NS_BEGIN PUGI__THROW_ERROR(status_bad_doctype, s); } - char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel) + char_t* parse_doctype_group(char_t* s, char_t endch) { + size_t depth = 0; + assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); - s++; + s += 2; while (*s) { @@ -2399,12 +2406,8 @@ PUGI__NS_BEGIN else { // some control group - s = parse_doctype_group(s, endch, false); - if (!s) return s; - - // skip > - assert(*s == '>'); - s++; + s += 2; + depth++; } } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') @@ -2415,12 +2418,16 @@ PUGI__NS_BEGIN } else if (*s == '>') { - return s; + if (depth == 0) + return s; + + depth--; + s++; } else s++; } - if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); + if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); return s; } @@ -2512,7 +2519,7 @@ PUGI__NS_BEGIN char_t* mark = s + 9; - s = parse_doctype_group(s, endch, true); + s = parse_doctype_group(s, endch); if (!s) return s; assert((*s == 0 && endch == '>') || *s == '>'); diff --git a/tests/test_parse_doctype.cpp b/tests/test_parse_doctype.cpp index 14268f6..646ebbf 100644 --- a/tests/test_parse_doctype.cpp +++ b/tests/test_parse_doctype.cpp @@ -322,3 +322,43 @@ TEST(parse_doctype_error_ignore) CHECK(doc.load_string(STR("<!DOCTYPE root [ <![IGNORE[ <![INCLUDE[")).status == status_bad_doctype); CHECK(doc.load_string(STR("<!DOCTYPE root [ <![IGNORE[ <![INCLUDE["), parse_doctype).status == status_bad_doctype); } + +TEST(parse_doctype_stackless_group) +{ + std::basic_string<char_t> str; + + int count = 100000; + + str += "<!DOCTYPE "; + + for (int i = 0; i < count; ++i) + str += STR("<!G "); + + for (int j = 0; j < count; ++j) + str += STR(">"); + + str += ">"; + + xml_document doc; + CHECK(doc.load_string(str.c_str(), parse_fragment)); +} + +TEST(parse_doctype_stackless_ignore) +{ + std::basic_string<char_t> str; + + int count = 100000; + + str += "<!DOCTYPE "; + + for (int i = 0; i < count; ++i) + str += STR("<![IGNORE[ "); + + for (int j = 0; j < count; ++j) + str += STR("]]>"); + + str += ">"; + + xml_document doc; + CHECK(doc.load_string(str.c_str(), parse_fragment)); +} |