From 1b87d3dcbf3ce5a6384826740dc65cf4917a1555 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Fri, 9 Dec 2011 05:24:07 +0000 Subject: Introduced parse_ws_pcdata_single flag: only parses whitespace-only PCDATA if it's the only child of the parent node (middle ground between default flags and parse_ws_pcdata) git-svn-id: http://pugixml.googlecode.com/svn/trunk@825 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'src/pugixml.cpp') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 3680fc5..a3c6abd 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1898,7 +1898,7 @@ namespace // Parser utilities. #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; } - #define OPTSET(OPT) ( optmsk & OPT ) + #define OPTSET(OPT) ( optmsk & (OPT) ) #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) THROW_ERROR(status_out_of_memory, s); } #define POPNODE() { cursor = cursor->parent; } #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; } @@ -2402,10 +2402,20 @@ namespace SKIPWS(); // Eat whitespace if no genuine PCDATA here. - if ((!OPTSET(parse_ws_pcdata) || mark == s) && (*s == '<' || !*s)) - { - continue; - } + if (*s == '<') + { + // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one + assert(mark != s); + + if (!OPTSET(parse_ws_pcdata | parse_ws_pcdata_single)) + { + continue; + } + else if (OPTSET(parse_ws_pcdata_single)) + { + if (s[1] != '/' || cursor->first_child) continue; + } + } s = mark; -- cgit v1.2.3