summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2011-12-09 05:24:07 +0000
committerarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2011-12-09 05:24:07 +0000
commit1b87d3dcbf3ce5a6384826740dc65cf4917a1555 (patch)
treec55392226c9a12d28eef2d14a67bc79c8ae3b893 /src
parentfbfd2ae25a5382e6c1fba3290c656ceb0db15d5b (diff)
Introduced parse_ws_pcdata_single flag: only parses whitespace-only PCDATA if it's the only child of the parent node (middle ground between default flags and parse_ws_pcdata)
git-svn-id: http://pugixml.googlecode.com/svn/trunk@825 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'src')
-rw-r--r--src/pugixml.cpp20
-rw-r--r--src/pugixml.hpp5
2 files changed, 20 insertions, 5 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 3680fc5..a3c6abd 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -1898,7 +1898,7 @@ namespace
// Parser utilities.
#define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; }
- #define OPTSET(OPT) ( optmsk & OPT )
+ #define OPTSET(OPT) ( optmsk & (OPT) )
#define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) THROW_ERROR(status_out_of_memory, s); }
#define POPNODE() { cursor = cursor->parent; }
#define SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
@@ -2402,10 +2402,20 @@ namespace
SKIPWS(); // Eat whitespace if no genuine PCDATA here.
- if ((!OPTSET(parse_ws_pcdata) || mark == s) && (*s == '<' || !*s))
- {
- continue;
- }
+ if (*s == '<')
+ {
+ // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
+ assert(mark != s);
+
+ if (!OPTSET(parse_ws_pcdata | parse_ws_pcdata_single))
+ {
+ continue;
+ }
+ else if (OPTSET(parse_ws_pcdata_single))
+ {
+ if (s[1] != '/' || cursor->first_child) continue;
+ }
+ }
s = mark;
diff --git a/src/pugixml.hpp b/src/pugixml.hpp
index 1826b45..11bf279 100644
--- a/src/pugixml.hpp
+++ b/src/pugixml.hpp
@@ -164,6 +164,11 @@ namespace pugi
// This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
const unsigned int parse_doctype = 0x0200;
+ // This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
+ // of whitespace is added to the DOM tree.
+ // This flag is off by default; turning it on may result in slower parsing and more memory consumption.
+ const unsigned int parse_ws_pcdata_single = 0x0400;
+
// The default parsing mode.
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.