summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2015-02-12 08:12:12 -0800
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2015-02-12 08:12:12 -0800
commite94552c9ca883f8c4f2cead24355a60ecba0efb2 (patch)
tree0aa1f9ed3d61c110d458f4c044920bd5998460fe
parent00b4b0192f88392e80f1c504526c7e73f4d16ec7 (diff)
DOCTYPE parsing is now stackless
This prevents malformed input XML with very deeply recursive DOCTYPE sections from crashing the parser. Fixes #29.
-rw-r--r--src/pugixml.cpp37
-rw-r--r--tests/test_parse_doctype.cpp40
2 files changed, 62 insertions, 15 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 265337a..0f696ab 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -2357,23 +2357,28 @@ PUGI__NS_BEGIN
char_t* parse_doctype_ignore(char_t* s)
{
+ size_t depth = 0;
+
assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
- s++;
+ s += 3;
while (*s)
{
if (s[0] == '<' && s[1] == '!' && s[2] == '[')
{
// nested ignore section
- s = parse_doctype_ignore(s);
- if (!s) return s;
+ s += 3;
+ depth++;
}
else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
{
// ignore section end
s += 3;
- return s;
+ if (depth == 0)
+ return s;
+
+ depth--;
}
else s++;
}
@@ -2381,10 +2386,12 @@ PUGI__NS_BEGIN
PUGI__THROW_ERROR(status_bad_doctype, s);
}
- char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
+ char_t* parse_doctype_group(char_t* s, char_t endch)
{
+ size_t depth = 0;
+
assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
- s++;
+ s += 2;
while (*s)
{
@@ -2399,12 +2406,8 @@ PUGI__NS_BEGIN
else
{
// some control group
- s = parse_doctype_group(s, endch, false);
- if (!s) return s;
-
- // skip >
- assert(*s == '>');
- s++;
+ s += 2;
+ depth++;
}
}
else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
@@ -2415,12 +2418,16 @@ PUGI__NS_BEGIN
}
else if (*s == '>')
{
- return s;
+ if (depth == 0)
+ return s;
+
+ depth--;
+ s++;
}
else s++;
}
- if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
+ if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
return s;
}
@@ -2512,7 +2519,7 @@ PUGI__NS_BEGIN
char_t* mark = s + 9;
- s = parse_doctype_group(s, endch, true);
+ s = parse_doctype_group(s, endch);
if (!s) return s;
assert((*s == 0 && endch == '>') || *s == '>');
diff --git a/tests/test_parse_doctype.cpp b/tests/test_parse_doctype.cpp
index 14268f6..646ebbf 100644
--- a/tests/test_parse_doctype.cpp
+++ b/tests/test_parse_doctype.cpp
@@ -322,3 +322,43 @@ TEST(parse_doctype_error_ignore)
CHECK(doc.load_string(STR("<!DOCTYPE root [ <![IGNORE[ <![INCLUDE[")).status == status_bad_doctype);
CHECK(doc.load_string(STR("<!DOCTYPE root [ <![IGNORE[ <![INCLUDE["), parse_doctype).status == status_bad_doctype);
}
+
+TEST(parse_doctype_stackless_group)
+{
+ std::basic_string<char_t> str;
+
+ int count = 100000;
+
+ str += "<!DOCTYPE ";
+
+ for (int i = 0; i < count; ++i)
+ str += STR("<!G ");
+
+ for (int j = 0; j < count; ++j)
+ str += STR(">");
+
+ str += ">";
+
+ xml_document doc;
+ CHECK(doc.load_string(str.c_str(), parse_fragment));
+}
+
+TEST(parse_doctype_stackless_ignore)
+{
+ std::basic_string<char_t> str;
+
+ int count = 100000;
+
+ str += "<!DOCTYPE ";
+
+ for (int i = 0; i < count; ++i)
+ str += STR("<![IGNORE[ ");
+
+ for (int j = 0; j < count; ++j)
+ str += STR("]]>");
+
+ str += ">";
+
+ xml_document doc;
+ CHECK(doc.load_string(str.c_str(), parse_fragment));
+}