summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2016-01-08 08:37:26 -0800
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2016-01-08 08:37:26 -0800
commit2874f6f21dc22efab1a2884fe463c5461955a225 (patch)
tree7e45b251b6f7e327f9a03ec5edd5ad794ec2fc9f
parentad3b492c1a4b3bf3a3163aa2af1641f422dba33f (diff)
Add initial support for parse_embed_pcdata
When this flag is true, PCDATA value is saved to the parent element instead of allocating a new node. This prevents some documents from round-tripping since it loses information, but can provide a significant memory reduction and parsing speedup for some documents.
-rw-r--r--src/pugixml.cpp17
-rw-r--r--src/pugixml.hpp5
2 files changed, 18 insertions, 4 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 35c0d8e..de87dcf 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -3360,13 +3360,22 @@ PUGI__NS_BEGIN
if (cursor->parent || PUGI__OPTSET(parse_fragment))
{
- PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
+ if (!PUGI__OPTSET(parse_embed_pcdata))
+ {
+ PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+
+ cursor->value = s; // Save the offset.
+
+ PUGI__POPNODE(); // Pop since this is a standalone.
+ }
+ else
+ {
+ if (cursor->parent && !cursor->value)
+ cursor->value = s; // Save the offset.
+ }
s = strconv_pcdata(s);
- PUGI__POPNODE(); // Pop since this is a standalone.
-
if (!*s) break;
}
else
diff --git a/src/pugixml.hpp b/src/pugixml.hpp
index 540e6ba..4ed6f55 100644
--- a/src/pugixml.hpp
+++ b/src/pugixml.hpp
@@ -158,6 +158,11 @@ namespace pugi
// is a valid document. This flag is off by default.
const unsigned int parse_fragment = 0x1000;
+ // This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of
+ // the document and does not allow some documents to round-trip; this flag is only recommended for parsing documents with a lot of
+ // PCDATA nodes in a very memory-constrained environment. This flag is off by default.
+ const unsigned int parse_embed_pcdata = 0x2000;
+
// The default parsing mode.
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.