From 0ed895d79c77979e2721822020dbf386da7a0747 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Sat, 12 Jun 2010 07:30:13 +0000 Subject: Refactored PI/declaration parsing, now non top-level declarations result in parsing errors git-svn-id: http://pugixml.googlecode.com/svn/trunk@515 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 122 ++++++++++++++++++++++---------------------------------- 1 file changed, 48 insertions(+), 74 deletions(-) (limited to 'src/pugixml.cpp') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 68a68d0..64b1e34 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -2044,105 +2044,79 @@ namespace // parse node contents, starting with question mark ++s; - if (!IS_CHARTYPE(*s, ct_start_symbol)) // bad PI - THROW_ERROR(status_bad_pi, s); - else if (OPTSET(parse_pi) || OPTSET(parse_declaration)) - { - char_t* mark = s; - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Read PI target - CHECK_ERROR(status_bad_pi, s); - - if (!IS_CHARTYPE(*s, ct_space) && *s != '?') // Target has to end with space or ? - THROW_ERROR(status_bad_pi, s); + // read PI target + char_t* target = s; - ENDSEG(); - if (*s == 0 && endch != '>') THROW_ERROR(status_bad_pi, s); - - if (ch == '?') // nothing except target present - { - if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); - s += (*s == '>'); + if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s); - // stricmp / strcasecmp is not portable - if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M') - && (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0) - { - if (OPTSET(parse_declaration)) - { - PUSHNODE(node_declaration); + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); + CHECK_ERROR(status_bad_pi, s); - cursor->name = mark; + // determine node type; stricmp / strcasecmp is not portable + bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; - POPNODE(); - } - } - else if (OPTSET(parse_pi)) - { - PUSHNODE(node_pi); // Append a new node on the tree. - - cursor->name = mark; + if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi)) + { + if (declaration) + { + // disallow non top-level declarations + if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s); - POPNODE(); - } + PUSHNODE(node_declaration); } - // stricmp / strcasecmp is not portable - else if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M') - && (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0) + else { - if (OPTSET(parse_declaration)) - { - PUSHNODE(node_declaration); - - cursor->name = mark; - - // scan for tag end - mark = s; + PUSHNODE(node_pi); + } - SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'. - CHECK_ERROR(status_bad_pi, s); + cursor->name = target; - // replace ending ? with / to terminate properly - *s = '/'; + ENDSEG(); - // parse attributes - s = mark; + // parse value/attributes + if (ch == '?') + { + // empty node + if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); + s += (*s == '>'); - // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES - } + POPNODE(); } - else + else if (IS_CHARTYPE(ch, ct_space)) { - if (OPTSET(parse_pi)) - { - PUSHNODE(node_pi); // Append a new node on the tree. - - cursor->name = mark; - } - - // ch is a whitespace character, skip whitespaces SKIPWS(); - CHECK_ERROR(status_bad_pi, s); - mark = s; + // scan for tag end + char_t* value = s; - SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'. + SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); CHECK_ERROR(status_bad_pi, s); - ENDSEG(); - - s += (*s == '>'); // Step over > - - if (OPTSET(parse_pi)) + if (declaration) { - cursor->value = mark; + // replace ending ? with / so that 'element' terminates properly + *s = '/'; + // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES + s = value; + } + else + { + // store value and step over > + cursor->value = value; POPNODE(); + + ENDSEG(); + + s += (*s == '>'); } } + else THROW_ERROR(status_bad_pi, s); } - else // not parsing PI + else { - SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'. + // scan for tag end + SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); CHECK_ERROR(status_bad_pi, s); s += (s[1] == '>' ? 2 : 1); -- cgit v1.2.3