summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2016-01-14 07:52:40 -0800
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2016-01-14 07:52:40 -0800
commitc388dbeba4f5de655ca74eb21d0a6d29c5eaaee2 (patch)
tree2e4f67bf33ac0f4b982831b4cc31f61d50cec836 /src
parentad3b492c1a4b3bf3a3163aa2af1641f422dba33f (diff)
parent4f3be7616729cbf0c8768caf861331d710d457a8 (diff)
Merge pull request #79 from zeux/embed-pcdata
Add parse_embed_pcdata flag This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of the document; this flag is only recommended for parsing documents with a lot of PCDATA nodes in a very memory-constrained environment. Most high-level APIs continue to work; code that inspects DOM using first_child()/value() will have to be adapted.
Diffstat (limited to 'src')
-rw-r--r--src/pugixml.cpp63
-rw-r--r--src/pugixml.hpp5
2 files changed, 60 insertions, 8 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 35c0d8e..158a24d 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -3360,13 +3360,21 @@ PUGI__NS_BEGIN
if (cursor->parent || PUGI__OPTSET(parse_fragment))
{
- PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
+ if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
+ {
+ cursor->value = s; // Save the offset.
+ }
+ else
+ {
+ PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+
+ cursor->value = s; // Save the offset.
+
+ PUGI__POPNODE(); // Pop since this is a standalone.
+ }
s = strconv_pcdata(s);
- PUGI__POPNODE(); // Pop since this is a standalone.
-
if (!*s) break;
}
else
@@ -4009,17 +4017,40 @@ PUGI__NS_BEGIN
if (node->first_attribute)
node_output_attributes(writer, node, indent, indent_length, flags, depth);
- if (!node->first_child)
+ // element nodes can have value if parse_embed_pcdata was used
+ if (!node->value)
{
- writer.write(' ', '/', '>');
+ if (!node->first_child)
+ {
+ writer.write(' ', '/', '>');
- return false;
+ return false;
+ }
+ else
+ {
+ writer.write('>');
+
+ return true;
+ }
}
else
{
writer.write('>');
- return true;
+ text_output(writer, node->value, ctx_special_pcdata, flags);
+
+ if (!node->first_child)
+ {
+ writer.write('<', '/');
+ writer.write_string(name);
+ writer.write('>');
+
+ return false;
+ }
+ else
+ {
+ return true;
+ }
}
}
@@ -4127,6 +4158,10 @@ PUGI__NS_BEGIN
if (node_output_start(writer, node, indent, indent_length, flags, depth))
{
+ // element nodes can have value if parse_embed_pcdata was used
+ if (node->value)
+ indent_flags = 0;
+
node = node->first_child;
depth++;
continue;
@@ -5451,6 +5486,10 @@ namespace pugi
{
if (!_root) return PUGIXML_TEXT("");
+ // element nodes can have value if parse_embed_pcdata was used
+ if (PUGI__NODETYPE(_root) == node_element && _root->value)
+ return _root->value;
+
for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
if (impl::is_text_node(i) && i->value)
return i->value;
@@ -6198,6 +6237,10 @@ namespace pugi
{
if (!_root || impl::is_text_node(_root)) return _root;
+ // element nodes can have value if parse_embed_pcdata was used
+ if (PUGI__NODETYPE(_root) == node_element && _root->value)
+ return _root;
+
for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
if (impl::is_text_node(node))
return node;
@@ -7636,6 +7679,10 @@ PUGI__NS_BEGIN
{
xpath_string result;
+ // element nodes can have value if parse_embed_pcdata was used
+ if (n.value()[0])
+ result.append(xpath_string::from_const(n.value()), alloc);
+
xml_node cur = n.first_child();
while (cur && cur != n)
diff --git a/src/pugixml.hpp b/src/pugixml.hpp
index 540e6ba..e561490 100644
--- a/src/pugixml.hpp
+++ b/src/pugixml.hpp
@@ -158,6 +158,11 @@ namespace pugi
// is a valid document. This flag is off by default.
const unsigned int parse_fragment = 0x1000;
+ // This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of
+ // the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments.
+ // This flag is off by default.
+ const unsigned int parse_embed_pcdata = 0x2000;
+
// The default parsing mode.
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.