diff options
author | arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> | 2010-06-12 07:30:13 +0000 |
---|---|---|
committer | arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> | 2010-06-12 07:30:13 +0000 |
commit | 0ed895d79c77979e2721822020dbf386da7a0747 (patch) | |
tree | bf8cdfc0913268d0f6eeef1e26ab8067720aa025 /src | |
parent | f2050e5170e5c49b19ec591a36fcb748bbc0b351 (diff) |
Refactored PI/declaration parsing, now non top-level declarations result in parsing errors
git-svn-id: http://pugixml.googlecode.com/svn/trunk@515 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'src')
-rw-r--r-- | src/pugixml.cpp | 122 |
1 files changed, 48 insertions, 74 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 68a68d0..64b1e34 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -2044,105 +2044,79 @@ namespace // parse node contents, starting with question mark
++s;
- if (!IS_CHARTYPE(*s, ct_start_symbol)) // bad PI
- THROW_ERROR(status_bad_pi, s);
- else if (OPTSET(parse_pi) || OPTSET(parse_declaration))
- {
- char_t* mark = s;
- SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Read PI target
- CHECK_ERROR(status_bad_pi, s);
-
- if (!IS_CHARTYPE(*s, ct_space) && *s != '?') // Target has to end with space or ?
- THROW_ERROR(status_bad_pi, s);
+ // read PI target
+ char_t* target = s;
- ENDSEG();
- if (*s == 0 && endch != '>') THROW_ERROR(status_bad_pi, s);
-
- if (ch == '?') // nothing except target present
- {
- if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
- s += (*s == '>');
+ if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s);
- // stricmp / strcasecmp is not portable
- if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M')
- && (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0)
- {
- if (OPTSET(parse_declaration))
- {
- PUSHNODE(node_declaration);
+ SCANWHILE(IS_CHARTYPE(*s, ct_symbol));
+ CHECK_ERROR(status_bad_pi, s);
- cursor->name = mark;
+ // determine node type; stricmp / strcasecmp is not portable
+ bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
- POPNODE();
- }
- }
- else if (OPTSET(parse_pi))
- {
- PUSHNODE(node_pi); // Append a new node on the tree.
-
- cursor->name = mark;
+ if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi))
+ {
+ if (declaration)
+ {
+ // disallow non top-level declarations
+ if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s);
- POPNODE();
- }
+ PUSHNODE(node_declaration);
}
- // stricmp / strcasecmp is not portable
- else if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M')
- && (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0)
+ else
{
- if (OPTSET(parse_declaration))
- {
- PUSHNODE(node_declaration);
-
- cursor->name = mark;
-
- // scan for tag end
- mark = s;
+ PUSHNODE(node_pi);
+ }
- SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
- CHECK_ERROR(status_bad_pi, s);
+ cursor->name = target;
- // replace ending ? with / to terminate properly
- *s = '/';
+ ENDSEG();
- // parse attributes
- s = mark;
+ // parse value/attributes
+ if (ch == '?')
+ {
+ // empty node
+ if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
+ s += (*s == '>');
- // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
- }
+ POPNODE();
}
- else
+ else if (IS_CHARTYPE(ch, ct_space))
{
- if (OPTSET(parse_pi))
- {
- PUSHNODE(node_pi); // Append a new node on the tree.
-
- cursor->name = mark;
- }
-
- // ch is a whitespace character, skip whitespaces
SKIPWS();
- CHECK_ERROR(status_bad_pi, s);
- mark = s;
+ // scan for tag end
+ char_t* value = s;
- SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
+ SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
CHECK_ERROR(status_bad_pi, s);
- ENDSEG();
-
- s += (*s == '>'); // Step over >
-
- if (OPTSET(parse_pi))
+ if (declaration)
{
- cursor->value = mark;
+ // replace ending ? with / so that 'element' terminates properly
+ *s = '/';
+ // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
+ s = value;
+ }
+ else
+ {
+ // store value and step over >
+ cursor->value = value;
POPNODE();
+
+ ENDSEG();
+
+ s += (*s == '>');
}
}
+ else THROW_ERROR(status_bad_pi, s);
}
- else // not parsing PI
+ else
{
- SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
+ // scan for tag end
+ SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
CHECK_ERROR(status_bad_pi, s);
s += (s[1] == '>' ? 2 : 1);
|