summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2010-06-12 07:30:13 +0000
committerarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2010-06-12 07:30:13 +0000
commit0ed895d79c77979e2721822020dbf386da7a0747 (patch)
treebf8cdfc0913268d0f6eeef1e26ab8067720aa025
parentf2050e5170e5c49b19ec591a36fcb748bbc0b351 (diff)
Refactored PI/declaration parsing, now non top-level declarations result in parsing errors
git-svn-id: http://pugixml.googlecode.com/svn/trunk@515 99668b35-9821-0410-8761-19e4c4f06640
-rw-r--r--src/pugixml.cpp122
-rw-r--r--tests/test_dom_modify.cpp7
2 files changed, 53 insertions, 76 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 68a68d0..64b1e34 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -2044,105 +2044,79 @@ namespace
// parse node contents, starting with question mark
++s;
- if (!IS_CHARTYPE(*s, ct_start_symbol)) // bad PI
- THROW_ERROR(status_bad_pi, s);
- else if (OPTSET(parse_pi) || OPTSET(parse_declaration))
- {
- char_t* mark = s;
- SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Read PI target
- CHECK_ERROR(status_bad_pi, s);
-
- if (!IS_CHARTYPE(*s, ct_space) && *s != '?') // Target has to end with space or ?
- THROW_ERROR(status_bad_pi, s);
+ // read PI target
+ char_t* target = s;
- ENDSEG();
- if (*s == 0 && endch != '>') THROW_ERROR(status_bad_pi, s);
-
- if (ch == '?') // nothing except target present
- {
- if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
- s += (*s == '>');
+ if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s);
- // stricmp / strcasecmp is not portable
- if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M')
- && (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0)
- {
- if (OPTSET(parse_declaration))
- {
- PUSHNODE(node_declaration);
+ SCANWHILE(IS_CHARTYPE(*s, ct_symbol));
+ CHECK_ERROR(status_bad_pi, s);
- cursor->name = mark;
+ // determine node type; stricmp / strcasecmp is not portable
+ bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
- POPNODE();
- }
- }
- else if (OPTSET(parse_pi))
- {
- PUSHNODE(node_pi); // Append a new node on the tree.
-
- cursor->name = mark;
+ if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi))
+ {
+ if (declaration)
+ {
+ // disallow non top-level declarations
+ if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s);
- POPNODE();
- }
+ PUSHNODE(node_declaration);
}
- // stricmp / strcasecmp is not portable
- else if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M')
- && (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0)
+ else
{
- if (OPTSET(parse_declaration))
- {
- PUSHNODE(node_declaration);
-
- cursor->name = mark;
-
- // scan for tag end
- mark = s;
+ PUSHNODE(node_pi);
+ }
- SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
- CHECK_ERROR(status_bad_pi, s);
+ cursor->name = target;
- // replace ending ? with / to terminate properly
- *s = '/';
+ ENDSEG();
- // parse attributes
- s = mark;
+ // parse value/attributes
+ if (ch == '?')
+ {
+ // empty node
+ if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
+ s += (*s == '>');
- // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
- }
+ POPNODE();
}
- else
+ else if (IS_CHARTYPE(ch, ct_space))
{
- if (OPTSET(parse_pi))
- {
- PUSHNODE(node_pi); // Append a new node on the tree.
-
- cursor->name = mark;
- }
-
- // ch is a whitespace character, skip whitespaces
SKIPWS();
- CHECK_ERROR(status_bad_pi, s);
- mark = s;
+ // scan for tag end
+ char_t* value = s;
- SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
+ SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
CHECK_ERROR(status_bad_pi, s);
- ENDSEG();
-
- s += (*s == '>'); // Step over >
-
- if (OPTSET(parse_pi))
+ if (declaration)
{
- cursor->value = mark;
+ // replace ending ? with / so that 'element' terminates properly
+ *s = '/';
+ // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
+ s = value;
+ }
+ else
+ {
+ // store value and step over >
+ cursor->value = value;
POPNODE();
+
+ ENDSEG();
+
+ s += (*s == '>');
}
}
+ else THROW_ERROR(status_bad_pi, s);
}
- else // not parsing PI
+ else
{
- SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
+ // scan for tag end
+ SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
CHECK_ERROR(status_bad_pi, s);
s += (s[1] == '>' ? 2 : 1);
diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp
index b45dab1..31647d4 100644
--- a/tests/test_dom_modify.cpp
+++ b/tests/test_dom_modify.cpp
@@ -513,10 +513,13 @@ TEST_XML(dom_node_copy_crossdoc, "<node/>")
CHECK_NODE(newdoc, STR("<node />"));
}
-TEST_XML_FLAGS(dom_node_copy_types, "<root><?xml version='1.0'?><?pi value?><!--comment--><node id='1'>pcdata<![CDATA[cdata]]></node></root>", parse_default | parse_pi | parse_comments | parse_declaration)
+TEST_XML_FLAGS(dom_node_copy_types, "<?xml version='1.0'?><root><?pi value?><!--comment--><node id='1'>pcdata<![CDATA[cdata]]></node></root>", parse_default | parse_pi | parse_comments | parse_declaration)
{
doc.append_copy(doc.child(STR("root")));
- CHECK_NODE(doc, STR("<root><?xml version=\"1.0\"?><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root><root><?xml version=\"1.0\"?><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root>"));
+ CHECK_NODE(doc, STR("<?xml version=\"1.0\"?><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root>"));
+
+ doc.insert_copy_before(doc.first_child(), doc.first_child());
+ CHECK_NODE(doc, STR("<?xml version=\"1.0\"?><?xml version=\"1.0\"?><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root>"));
}
TEST_XML(dom_attr_assign_large_number, "<node attr1='' attr2='' />")