From e7d2540c1a1be4c4f69d64771cfddfaf0fc6556a Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 14 Oct 2014 04:11:17 +0000 Subject: Adjust comment output to avoid malformed documents. Comment value can not contain the string "--" or end with "-". Since comments do not support escaping, we're handling this by adding an extra space after the first "-". A string of "-" thus turns into "- - - ...". git-svn-id: https://pugixml.googlecode.com/svn/trunk@1058 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 93b648b..7db62d0 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3424,6 +3424,31 @@ PUGI__NS_BEGIN } } + PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) + { + writer.write('<', '!', '-', '-'); + + while (*s) + { + const char_t* prev = s; + + // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body + while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; + + writer.write_buffer(prev, static_cast(s - prev)); + + if (*s) + { + assert(*s == '-'); + + writer.write('-', ' '); + ++s; + } + } + + writer.write('-', '-', '>'); + } + PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node node, unsigned int flags) { const char_t* default_name = PUGIXML_TEXT(":anonymous"); @@ -3523,22 +3548,15 @@ PUGI__NS_BEGIN break; case node_comment: - writer.write('<', '!', '-', '-'); - writer.write_string(node.value()); - writer.write('-', '-', '>'); + node_output_comment(writer, node.value()); if ((flags & format_raw) == 0) writer.write('\n'); break; case node_pi: - case node_declaration: writer.write('<', '?'); writer.write_string(node.name()[0] ? node.name() : default_name); - if (node.type() == node_declaration) - { - node_output_attributes(writer, node, flags); - } - else if (node.value()[0]) + if (node.value()[0]) { writer.write(' '); writer.write_string(node.value()); @@ -3548,6 +3566,14 @@ PUGI__NS_BEGIN if ((flags & format_raw) == 0) writer.write('\n'); break; + case node_declaration: + writer.write('<', '?'); + writer.write_string(node.name()[0] ? node.name() : default_name); + node_output_attributes(writer, node, flags); + writer.write('?', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + case node_doctype: writer.write('<', '!', 'D', 'O', 'C'); writer.write('T', 'Y', 'P', 'E'); -- cgit v1.2.3 From 883031fb45cf0f86cd36b20ad4762da58dd6126c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 15 Oct 2014 06:05:49 +0000 Subject: XPath: Fix optimization bug with //name[last()] The actual condition for the optimization is invariance from context list -- this includes both position() and last(). Instead of splitting the posinv concept just include last() into non-posinv expressions - this requires sorting for boolean predicates that depend on last() and do not depend on position(). These cases should be very rare. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1060 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 7db62d0..484f34b 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -9621,6 +9621,7 @@ PUGI__NS_BEGIN switch (_type) { case ast_func_position: + case ast_func_last: return false; case ast_string_constant: -- cgit v1.2.3 From 5da51dff270f430701b26428c9422f21e0ea4c9c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 16 Oct 2014 03:46:42 +0000 Subject: XPath: Optimize attribute axis lookup When looking for an attribute by name, finding the first attribute means we can stop looking since attribute names are unique. This makes some queries faster by 40%. Another very common pattern in XPath queries is finding an attribute with a specified value using a predicate (@name = 'value'). While we perform an optimal amount of traversal in that case, there is a substantial overhead with evaluating the nodes, saving and restoring the stack state, pushing the attribute node into a set, etc. Detecting this pattern allows us to use optimized code, resulting in up to 2x speedup for some queries. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1061 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 57 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 484f34b..71c3f5d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7473,6 +7473,11 @@ PUGI__NS_BEGIN *write = 0; } + inline bool is_xpath_attribute(const char_t* name) + { + return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); + } + struct xpath_variable_boolean: xpath_variable { xpath_variable_boolean(): value(false) @@ -8217,7 +8222,6 @@ PUGI__NS_BEGIN ast_func_normalize_space_0, // normalize-space() ast_func_normalize_space_1, // normalize-space(left) ast_func_translate, // translate(left, right, third) - ast_func_translate_table, // translate(left, right, third) where right/third are constants ast_func_boolean, // boolean(left) ast_func_not, // not(left) ast_func_true, // true() @@ -8230,7 +8234,10 @@ PUGI__NS_BEGIN ast_func_ceiling, // ceiling(left) ast_func_round, // round(left) ast_step, // process set left with step - ast_step_root // select root node + ast_step_root, // select root node + + ast_opt_translate_table, // translate(left, right, third) where right/third are constants + ast_opt_compare_attribute // @name = 'string' }; enum axis_t @@ -8296,7 +8303,7 @@ PUGI__NS_BEGIN xpath_variable* variable; // node test for ast_step (node name/namespace/node type/pi target) const char_t* nodetest; - // table for ast_func_translate_table + // table for ast_opt_translate_table const unsigned char* table; } _data; @@ -8498,36 +8505,38 @@ PUGI__NS_BEGIN } } - void step_push(xpath_node_set_raw& ns, const xml_attribute a, const xml_node parent, xpath_allocator* alloc) + bool step_push(xpath_node_set_raw& ns, const xml_attribute a, const xml_node parent, xpath_allocator* alloc) { - if (!a) return; + if (!a) return false; const char_t* name = a.name(); - // There are no attribute nodes corresponding to attributes that declare namespaces - // That is, "xmlns:..." or "xmlns" - if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return; - switch (_test) { case nodetest_name: - if (strequal(name, _data.nodetest)) + if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) + { ns.push_back(xpath_node(a, parent), alloc); + return true; + } break; case nodetest_type_node: case nodetest_all: - ns.push_back(xpath_node(a, parent), alloc); + if (is_xpath_attribute(name)) + ns.push_back(xpath_node(a, parent), alloc); break; case nodetest_all_in_namespace: - if (starts_with(name, _data.nodetest)) + if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) ns.push_back(xpath_node(a, parent), alloc); break; default: ; } + + return false; } void step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc) @@ -8589,7 +8598,8 @@ PUGI__NS_BEGIN case axis_attribute: { for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) - step_push(ns, a, n, alloc); + if (step_push(ns, a, n, alloc)) + break; // step_push returns true if it found a unique attribute break; } @@ -9007,6 +9017,15 @@ PUGI__NS_BEGIN return false; } + case ast_opt_compare_attribute: + { + const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); + + xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); + + return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); + } + case ast_variable: { assert(_rettype == _data.variable->type()); @@ -9412,7 +9431,7 @@ PUGI__NS_BEGIN return s; } - case ast_func_translate_table: + case ast_opt_translate_table: { xpath_string s = _left->eval_string(c, stack); @@ -9610,10 +9629,18 @@ PUGI__NS_BEGIN if (table) { - _type = ast_func_translate_table; + _type = ast_opt_translate_table; _data.table = table; } } + + // Use optimized path for @attr = 'value' or @attr = $value + if (_type == ast_op_equal && + _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && + (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->rettype() == xpath_type_string))) + { + _type = ast_opt_compare_attribute; + } } bool is_posinv_expr() const -- cgit v1.2.3 From 72ec01c5f6d23405f30614d63fafa048279ca13d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 18 Oct 2014 15:28:02 +0000 Subject: XPath: Extend the descendant-or-self optimization Use descendant-or-self::node() transformation for self, descendant and descendant-or-self axis. Self axis should be semi-frequent; descendant axes should not really be used with // but if they ever are the complexity of the step becomes quadratic so it's better to optimize this if possible. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1063 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 71c3f5d..1d9dcfe 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -9613,12 +9613,17 @@ PUGI__NS_BEGIN // Replace descendant-or-self::node()/child::foo with descendant::foo // The former is a full form of //foo, the latter is much faster since it executes the node test immediately + // Do a similar kind of replacement for self/descendant/descendant-or-self axes // Note that we only replace positionally invariant steps (//foo[1] != /descendant::foo[1]) - if (_type == ast_step && _axis == axis_child && _left && + if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && is_posinv_step()) { - _axis = axis_descendant; + if (_axis == axis_child || _axis == axis_descendant) + _axis = axis_descendant; + else + _axis = axis_descendant_or_self; + _left = _left->_left; } -- cgit v1.2.3 From f6635588758ed1b650be22903c2e2e81273e05c5 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 19 Oct 2014 07:33:42 +0000 Subject: XPath: Introduce xpath_query::evaluate_node This method is equivalent to xml_node::select_single_node. This makes select_single_node faster in certain cases by avoiding an allocation and - more importantly - paves the way for future step optimizations. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1064 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 56 +++++++++++++++++++++++++++++++++++++++----------------- src/pugixml.hpp | 6 ++++++ 2 files changed, 45 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 1d9dcfe..6f230dd 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -10581,6 +10581,25 @@ PUGI__NS_BEGIN return impl->root->eval_string(c, sd.stack); } + + PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) + { + if (!impl) return 0; + + if (impl->root->rettype() != xpath_type_node_set) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return 0; + #else + xpath_parse_result res; + res.error = "Expression does not evaluate to node set"; + + throw xpath_exception(res); + #endif + } + + return impl->root; + } PUGI__NS_END namespace pugi @@ -11082,22 +11101,9 @@ namespace pugi PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const { - if (!_impl) return xpath_node_set(); - - impl::xpath_ast_node* root = static_cast(_impl)->root; - - if (root->rettype() != xpath_type_node_set) - { - #ifdef PUGIXML_NO_EXCEPTIONS - return xpath_node_set(); - #else - xpath_parse_result res; - res.error = "Expression does not evaluate to node set"; + impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast(_impl)); + if (!root) return xpath_node_set(); - throw xpath_exception(res); - #endif - } - impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; @@ -11110,6 +11116,23 @@ namespace pugi return xpath_node_set(r.begin(), r.end(), r.type()); } + PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const + { + impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast(_impl)); + if (!root) return xpath_node(); + + impl::xpath_context c(n, 1, 1); + impl::xpath_stack_data sd; + + #ifdef PUGIXML_NO_EXCEPTIONS + if (setjmp(sd.error_handler)) return xpath_node(); + #endif + + impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack); + + return r.first(); + } + PUGI__FN const xpath_parse_result& xpath_query::result() const { return _result; @@ -11137,8 +11160,7 @@ namespace pugi PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const { - xpath_node_set s = query.evaluate_node_set(*this); - return s.empty() ? xpath_node() : s.first(); + return query.evaluate_node(*this); } PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 69b2cb2..2947bf4 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -1134,6 +1134,12 @@ namespace pugi // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead. xpath_node_set evaluate_node_set(const xpath_node& n) const; + // Evaluate expression as node set in the specified context. + // Return first node in document order, or empty node if node set is empty. + // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors. + // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead. + xpath_node evaluate_node(const xpath_node& n) const; + // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode) const xpath_parse_result& result() const; -- cgit v1.2.3 From c3eb9c92a86b041b40e70afb32ea66d4369c892b Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 19 Oct 2014 07:33:51 +0000 Subject: XPath: Rename xml_node::select_single_node to ::select_node select_node is shorter and mistyping nodes as node or vice versa should not lead to any issues since return types are substantially different. select_single_node method still works and will be deprecated with an attribute and removed at some point. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1065 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 17 ++++++++++++++--- src/pugixml.hpp | 9 +++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 6f230dd..99cdfc0 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11152,13 +11152,13 @@ namespace pugi return !_impl; } - PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const + PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const { xpath_query q(query, variables); - return select_single_node(q); + return select_node(q); } - PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const + PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const { return query.evaluate_node(*this); } @@ -11173,6 +11173,17 @@ namespace pugi { return query.evaluate_node_set(*this); } + + PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const + { + xpath_query q(query, variables); + return select_single_node(q); + } + + PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const + { + return query.evaluate_node(*this); + } } #endif diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 2947bf4..e252e16 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -585,12 +585,17 @@ namespace pugi #ifndef PUGIXML_NO_XPATH // Select single node by evaluating XPath query. Returns first node from the resulting node set. - xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node select_single_node(const xpath_query& query) const; + xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node select_node(const xpath_query& query) const; // Select node set by evaluating XPath query xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; xpath_node_set select_nodes(const xpath_query& query) const; + + // (deprecated: use select_node instead) Select single node by evaluating XPath query. + xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node select_single_node(const xpath_query& query) const; + #endif // Print subtree using a writer object -- cgit v1.2.3 From 1b8b87904b0618f853345619e7ee2656cab80113 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 20 Oct 2014 01:00:48 +0000 Subject: XPath: Introduce _first/_any set evaluation modes Sometimes when evaluating the node set we don't need the entire set and only need the first element in docorder or any element. In the absence of iterator support we can still use this information to short-circuit traversals. This does not have any effect on straightforward node collection queries, but frequently improves performance of complex queries with predicates etc. XMark benchmark gets 15x faster with some queries enjoying 100x speedup on 10 Mb dataset due to a significant complexity improvement. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1067 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 190 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 123 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 99cdfc0..e9ab09d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -8270,6 +8270,13 @@ PUGI__NS_BEGIN nodetest_all_in_namespace }; + enum nodeset_eval_t + { + nodeset_eval_all, + nodeset_eval_any, + nodeset_eval_first + }; + template struct axis_to_type { static const axis_t axis; @@ -8334,8 +8341,8 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ls = lhs->eval_node_set(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) @@ -8363,7 +8370,7 @@ PUGI__NS_BEGIN xpath_allocator_capture cr(stack.result); double l = lhs->eval_number(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { @@ -8380,7 +8387,7 @@ PUGI__NS_BEGIN xpath_allocator_capture cr(stack.result); xpath_string l = lhs->eval_string(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { @@ -8408,8 +8415,8 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ls = lhs->eval_node_set(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) { @@ -8433,7 +8440,7 @@ PUGI__NS_BEGIN xpath_allocator_capture cr(stack.result); double l = lhs->eval_number(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { @@ -8449,7 +8456,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ls = lhs->eval_node_set(c, stack); + xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); double r = rhs->eval_number(c, stack); for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) @@ -8469,7 +8476,7 @@ PUGI__NS_BEGIN } } - void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) + static void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) { assert(ns.size() >= first); @@ -8524,12 +8531,18 @@ PUGI__NS_BEGIN case nodetest_type_node: case nodetest_all: if (is_xpath_attribute(name)) + { ns.push_back(xpath_node(a, parent), alloc); + return true; + } break; case nodetest_all_in_namespace: if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) + { ns.push_back(xpath_node(a, parent), alloc); + return true; + } break; default: @@ -8539,57 +8552,80 @@ PUGI__NS_BEGIN return false; } - void step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc) + bool step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc) { - if (!n) return; + if (!n) return false; switch (_test) { case nodetest_name: if (n.type() == node_element && strequal(n.name(), _data.nodetest)) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_type_node: ns.push_back(n, alloc); - break; + return true; case nodetest_type_comment: if (n.type() == node_comment) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_type_text: if (n.type() == node_pcdata || n.type() == node_cdata) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_type_pi: if (n.type() == node_pi) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_pi: if (n.type() == node_pi && strequal(n.name(), _data.nodetest)) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_all: if (n.type() == node_element) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_all_in_namespace: if (n.type() == node_element && starts_with(n.name(), _data.nodetest)) + { ns.push_back(n, alloc); + return true; + } break; default: assert(!"Unknown axis"); - } + } + + return false; } - template void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, T) + template void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, bool once, T) { const axis_t axis = T::axis; @@ -8598,8 +8634,8 @@ PUGI__NS_BEGIN case axis_attribute: { for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) - if (step_push(ns, a, n, alloc)) - break; // step_push returns true if it found a unique attribute + if (step_push(ns, a, n, alloc) & once) + return; break; } @@ -8607,7 +8643,8 @@ PUGI__NS_BEGIN case axis_child: { for (xml_node c = n.first_child(); c; c = c.next_sibling()) - step_push(ns, c, alloc); + if (step_push(ns, c, alloc) & once) + return; break; } @@ -8616,13 +8653,15 @@ PUGI__NS_BEGIN case axis_descendant_or_self: { if (axis == axis_descendant_or_self) - step_push(ns, n, alloc); + if (step_push(ns, n, alloc) & once) + return; xml_node cur = n.first_child(); while (cur && cur != n) { - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; if (cur.first_child()) cur = cur.first_child(); @@ -8643,7 +8682,8 @@ PUGI__NS_BEGIN case axis_following_sibling: { for (xml_node c = n.next_sibling(); c; c = c.next_sibling()) - step_push(ns, c, alloc); + if (step_push(ns, c, alloc) & once) + return; break; } @@ -8651,7 +8691,8 @@ PUGI__NS_BEGIN case axis_preceding_sibling: { for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling()) - step_push(ns, c, alloc); + if (step_push(ns, c, alloc) & once) + return; break; } @@ -8666,7 +8707,8 @@ PUGI__NS_BEGIN for (;;) { - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; if (cur.first_child()) cur = cur.first_child(); @@ -8698,7 +8740,8 @@ PUGI__NS_BEGIN else { // leaf node, can't be ancestor - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; if (cur.previous_sibling()) cur = cur.previous_sibling(); @@ -8709,7 +8752,9 @@ PUGI__NS_BEGIN cur = cur.parent(); if (!cur) break; - if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc); + if (!node_is_ancestor(cur, n)) + if (step_push(ns, cur, alloc) & once) + return; } while (!cur.previous_sibling()); @@ -8727,13 +8772,15 @@ PUGI__NS_BEGIN case axis_ancestor_or_self: { if (axis == axis_ancestor_or_self) - step_push(ns, n, alloc); + if (step_push(ns, n, alloc) & once) + return; xml_node cur = n.parent(); while (cur) { - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; cur = cur.parent(); } @@ -8760,7 +8807,7 @@ PUGI__NS_BEGIN } } - template void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, T v) + template void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, bool once, T v) { const axis_t axis = T::axis; @@ -8770,13 +8817,15 @@ PUGI__NS_BEGIN case axis_ancestor_or_self: { if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test - step_push(ns, a, p, alloc); + if (step_push(ns, a, p, alloc) & once) + return; xml_node cur = p; while (cur) { - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; cur = cur.parent(); } @@ -8811,7 +8860,8 @@ PUGI__NS_BEGIN if (!cur) break; } - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; } break; @@ -8827,7 +8877,7 @@ PUGI__NS_BEGIN case axis_preceding: { // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding - step_fill(ns, p, alloc, v); + step_fill(ns, p, alloc, once, v); break; } @@ -8835,18 +8885,24 @@ PUGI__NS_BEGIN assert(!"Unimplemented axis"); } } - - template xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v) + + template xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) { const axis_t axis = T::axis; - bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); + bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); + bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); + + bool once = + (axis == axis_attribute && _test == nodetest_name) + ? true + : !_right && (axis_reverse ? eval == nodeset_eval_any : eval != nodeset_eval_all); xpath_node_set_raw ns; - ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted); + ns.set_type(axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted); if (_left) { - xpath_node_set_raw s = _left->eval_node_set(c, stack); + xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); // self axis preserves the original order if (axis == axis_self) ns.set_type(s.type()); @@ -8859,9 +8915,9 @@ PUGI__NS_BEGIN if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); if (it->node()) - step_fill(ns, it->node(), stack.result, v); - else if (attributes) - step_fill(ns, it->attribute(), it->parent(), stack.result, v); + step_fill(ns, it->node(), stack.result, once, v); + else if (axis_has_attributes) + step_fill(ns, it->attribute(), it->parent(), stack.result, once, v); apply_predicates(ns, size, stack); } @@ -8869,9 +8925,9 @@ PUGI__NS_BEGIN else { if (c.n.node()) - step_fill(ns, c.n.node(), stack.result, v); - else if (attributes) - step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v); + step_fill(ns, c.n.node(), stack.result, once, v); + else if (axis_has_attributes) + step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, once, v); apply_predicates(ns, 0, stack); } @@ -9054,7 +9110,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - return !eval_node_set(c, stack).empty(); + return !eval_node_set(c, stack, nodeset_eval_any).empty(); } default: @@ -9100,7 +9156,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - return static_cast(_left->eval_node_set(c, stack).size()); + return static_cast(_left->eval_node_set(c, stack, nodeset_eval_all).size()); } case ast_func_string_length_0: @@ -9133,7 +9189,7 @@ PUGI__NS_BEGIN double r = 0; - xpath_node_set_raw ns = _left->eval_node_set(c, stack); + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) { @@ -9269,7 +9325,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ns = _left->eval_node_set(c, stack); + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); xpath_node na = ns.first(); return xpath_string::from_const(local_name(na)); @@ -9286,7 +9342,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ns = _left->eval_node_set(c, stack); + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); xpath_node na = ns.first(); return xpath_string::from_const(qualified_name(na)); @@ -9303,7 +9359,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ns = _left->eval_node_set(c, stack); + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); xpath_node na = ns.first(); return xpath_string::from_const(namespace_uri(na)); @@ -9466,7 +9522,7 @@ PUGI__NS_BEGIN xpath_stack swapped_stack = {stack.temp, stack.result}; - xpath_node_set_raw ns = eval_node_set(c, swapped_stack); + xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); } @@ -9478,7 +9534,7 @@ PUGI__NS_BEGIN } } - xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack) + xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) { switch (_type) { @@ -9488,8 +9544,8 @@ PUGI__NS_BEGIN xpath_stack swapped_stack = {stack.temp, stack.result}; - xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack); - xpath_node_set_raw rs = _right->eval_node_set(c, stack); + xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); + xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother rs.set_type(xpath_node_set::type_unsorted); @@ -9503,7 +9559,7 @@ PUGI__NS_BEGIN case ast_filter: case ast_filter_posinv: { - xpath_node_set_raw set = _left->eval_node_set(c, stack); + xpath_node_set_raw set = _left->eval_node_set(c, stack, nodeset_eval_all); // either expression is a number or it contains position() call; sort by document order if (_type == ast_filter) set.sort_do(); @@ -9521,44 +9577,44 @@ PUGI__NS_BEGIN switch (_axis) { case axis_ancestor: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_ancestor_or_self: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_attribute: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_child: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_descendant: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_descendant_or_self: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_following: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_following_sibling: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_namespace: // namespaced axis is not supported return xpath_node_set_raw(); case axis_parent: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_preceding: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_preceding_sibling: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); case axis_self: - return step_do(c, stack, axis_to_type()); + return step_do(c, stack, eval, axis_to_type()); default: assert(!"Unknown axis"); @@ -11111,7 +11167,7 @@ namespace pugi if (setjmp(sd.error_handler)) return xpath_node_set(); #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack); + impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); return xpath_node_set(r.begin(), r.end(), r.type()); } @@ -11128,7 +11184,7 @@ namespace pugi if (setjmp(sd.error_handler)) return xpath_node(); #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack); + impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); return r.first(); } -- cgit v1.2.3 From 7774cdd96e01b2d89be16f7e240c1ffb2436b4c9 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 21 Oct 2014 03:33:37 +0000 Subject: XPath: Make sure step_push is called with valid nodes Some steps relied on step_push rejecting null inputs; this is no longer the case. Additionally stepping now more rigorously filters null inputs. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1069 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index e9ab09d..5c7fb6d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -8705,7 +8705,7 @@ PUGI__NS_BEGIN while (cur && !cur.next_sibling()) cur = cur.parent(); cur = cur.next_sibling(); - for (;;) + while (cur) { if (step_push(ns, cur, alloc) & once) return; @@ -8733,7 +8733,7 @@ PUGI__NS_BEGIN while (cur && !cur.previous_sibling()) cur = cur.parent(); cur = cur.previous_sibling(); - for (;;) + while (cur) { if (cur.last_child()) cur = cur.last_child(); @@ -8916,7 +8916,7 @@ PUGI__NS_BEGIN if (it->node()) step_fill(ns, it->node(), stack.result, once, v); - else if (axis_has_attributes) + else if (axis_has_attributes && it->attribute() && it->parent()) step_fill(ns, it->attribute(), it->parent(), stack.result, once, v); apply_predicates(ns, size, stack); @@ -8926,7 +8926,7 @@ PUGI__NS_BEGIN { if (c.n.node()) step_fill(ns, c.n.node(), stack.result, once, v); - else if (axis_has_attributes) + else if (axis_has_attributes && c.n.attribute() && c.n.parent()) step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, once, v); apply_predicates(ns, 0, stack); -- cgit v1.2.3