summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/pugixml.cpp190
-rw-r--r--tests/test_xpath.cpp22
-rw-r--r--tests/test_xpath_paths.cpp22
3 files changed, 167 insertions, 67 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 99cdfc0..e9ab09d 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -8270,6 +8270,13 @@ PUGI__NS_BEGIN
nodetest_all_in_namespace
};
+ enum nodeset_eval_t
+ {
+ nodeset_eval_all,
+ nodeset_eval_any,
+ nodeset_eval_first
+ };
+
template <axis_t N> struct axis_to_type
{
static const axis_t axis;
@@ -8334,8 +8341,8 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
@@ -8363,7 +8370,7 @@ PUGI__NS_BEGIN
xpath_allocator_capture cr(stack.result);
double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
{
@@ -8380,7 +8387,7 @@ PUGI__NS_BEGIN
xpath_allocator_capture cr(stack.result);
xpath_string l = lhs->eval_string(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
{
@@ -8408,8 +8415,8 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
{
@@ -8433,7 +8440,7 @@ PUGI__NS_BEGIN
xpath_allocator_capture cr(stack.result);
double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
{
@@ -8449,7 +8456,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
double r = rhs->eval_number(c, stack);
for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
@@ -8469,7 +8476,7 @@ PUGI__NS_BEGIN
}
}
- void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
+ static void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
{
assert(ns.size() >= first);
@@ -8524,12 +8531,18 @@ PUGI__NS_BEGIN
case nodetest_type_node:
case nodetest_all:
if (is_xpath_attribute(name))
+ {
ns.push_back(xpath_node(a, parent), alloc);
+ return true;
+ }
break;
case nodetest_all_in_namespace:
if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
+ {
ns.push_back(xpath_node(a, parent), alloc);
+ return true;
+ }
break;
default:
@@ -8539,57 +8552,80 @@ PUGI__NS_BEGIN
return false;
}
- void step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc)
+ bool step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc)
{
- if (!n) return;
+ if (!n) return false;
switch (_test)
{
case nodetest_name:
if (n.type() == node_element && strequal(n.name(), _data.nodetest))
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_type_node:
ns.push_back(n, alloc);
- break;
+ return true;
case nodetest_type_comment:
if (n.type() == node_comment)
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_type_text:
if (n.type() == node_pcdata || n.type() == node_cdata)
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_type_pi:
if (n.type() == node_pi)
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_pi:
if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_all:
if (n.type() == node_element)
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_all_in_namespace:
if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
default:
assert(!"Unknown axis");
- }
+ }
+
+ return false;
}
- template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, T)
+ template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, bool once, T)
{
const axis_t axis = T::axis;
@@ -8598,8 +8634,8 @@ PUGI__NS_BEGIN
case axis_attribute:
{
for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
- if (step_push(ns, a, n, alloc))
- break; // step_push returns true if it found a unique attribute
+ if (step_push(ns, a, n, alloc) & once)
+ return;
break;
}
@@ -8607,7 +8643,8 @@ PUGI__NS_BEGIN
case axis_child:
{
for (xml_node c = n.first_child(); c; c = c.next_sibling())
- step_push(ns, c, alloc);
+ if (step_push(ns, c, alloc) & once)
+ return;
break;
}
@@ -8616,13 +8653,15 @@ PUGI__NS_BEGIN
case axis_descendant_or_self:
{
if (axis == axis_descendant_or_self)
- step_push(ns, n, alloc);
+ if (step_push(ns, n, alloc) & once)
+ return;
xml_node cur = n.first_child();
while (cur && cur != n)
{
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
if (cur.first_child())
cur = cur.first_child();
@@ -8643,7 +8682,8 @@ PUGI__NS_BEGIN
case axis_following_sibling:
{
for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
- step_push(ns, c, alloc);
+ if (step_push(ns, c, alloc) & once)
+ return;
break;
}
@@ -8651,7 +8691,8 @@ PUGI__NS_BEGIN
case axis_preceding_sibling:
{
for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
- step_push(ns, c, alloc);
+ if (step_push(ns, c, alloc) & once)
+ return;
break;
}
@@ -8666,7 +8707,8 @@ PUGI__NS_BEGIN
for (;;)
{
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
if (cur.first_child())
cur = cur.first_child();
@@ -8698,7 +8740,8 @@ PUGI__NS_BEGIN
else
{
// leaf node, can't be ancestor
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
if (cur.previous_sibling())
cur = cur.previous_sibling();
@@ -8709,7 +8752,9 @@ PUGI__NS_BEGIN
cur = cur.parent();
if (!cur) break;
- if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
+ if (!node_is_ancestor(cur, n))
+ if (step_push(ns, cur, alloc) & once)
+ return;
}
while (!cur.previous_sibling());
@@ -8727,13 +8772,15 @@ PUGI__NS_BEGIN
case axis_ancestor_or_self:
{
if (axis == axis_ancestor_or_self)
- step_push(ns, n, alloc);
+ if (step_push(ns, n, alloc) & once)
+ return;
xml_node cur = n.parent();
while (cur)
{
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
cur = cur.parent();
}
@@ -8760,7 +8807,7 @@ PUGI__NS_BEGIN
}
}
- template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, T v)
+ template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, bool once, T v)
{
const axis_t axis = T::axis;
@@ -8770,13 +8817,15 @@ PUGI__NS_BEGIN
case axis_ancestor_or_self:
{
if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
- step_push(ns, a, p, alloc);
+ if (step_push(ns, a, p, alloc) & once)
+ return;
xml_node cur = p;
while (cur)
{
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
cur = cur.parent();
}
@@ -8811,7 +8860,8 @@ PUGI__NS_BEGIN
if (!cur) break;
}
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
}
break;
@@ -8827,7 +8877,7 @@ PUGI__NS_BEGIN
case axis_preceding:
{
// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
- step_fill(ns, p, alloc, v);
+ step_fill(ns, p, alloc, once, v);
break;
}
@@ -8835,18 +8885,24 @@ PUGI__NS_BEGIN
assert(!"Unimplemented axis");
}
}
-
- template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
+
+ template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
{
const axis_t axis = T::axis;
- bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
+ bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
+ bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
+
+ bool once =
+ (axis == axis_attribute && _test == nodetest_name)
+ ? true
+ : !_right && (axis_reverse ? eval == nodeset_eval_any : eval != nodeset_eval_all);
xpath_node_set_raw ns;
- ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
+ ns.set_type(axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
if (_left)
{
- xpath_node_set_raw s = _left->eval_node_set(c, stack);
+ xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
// self axis preserves the original order
if (axis == axis_self) ns.set_type(s.type());
@@ -8859,9 +8915,9 @@ PUGI__NS_BEGIN
if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
if (it->node())
- step_fill(ns, it->node(), stack.result, v);
- else if (attributes)
- step_fill(ns, it->attribute(), it->parent(), stack.result, v);
+ step_fill(ns, it->node(), stack.result, once, v);
+ else if (axis_has_attributes)
+ step_fill(ns, it->attribute(), it->parent(), stack.result, once, v);
apply_predicates(ns, size, stack);
}
@@ -8869,9 +8925,9 @@ PUGI__NS_BEGIN
else
{
if (c.n.node())
- step_fill(ns, c.n.node(), stack.result, v);
- else if (attributes)
- step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
+ step_fill(ns, c.n.node(), stack.result, once, v);
+ else if (axis_has_attributes)
+ step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, once, v);
apply_predicates(ns, 0, stack);
}
@@ -9054,7 +9110,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- return !eval_node_set(c, stack).empty();
+ return !eval_node_set(c, stack, nodeset_eval_any).empty();
}
default:
@@ -9100,7 +9156,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- return static_cast<double>(_left->eval_node_set(c, stack).size());
+ return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
}
case ast_func_string_length_0:
@@ -9133,7 +9189,7 @@ PUGI__NS_BEGIN
double r = 0;
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
{
@@ -9269,7 +9325,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
xpath_node na = ns.first();
return xpath_string::from_const(local_name(na));
@@ -9286,7 +9342,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
xpath_node na = ns.first();
return xpath_string::from_const(qualified_name(na));
@@ -9303,7 +9359,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
xpath_node na = ns.first();
return xpath_string::from_const(namespace_uri(na));
@@ -9466,7 +9522,7 @@ PUGI__NS_BEGIN
xpath_stack swapped_stack = {stack.temp, stack.result};
- xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
+ xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
}
@@ -9478,7 +9534,7 @@ PUGI__NS_BEGIN
}
}
- xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
+ xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
{
switch (_type)
{
@@ -9488,8 +9544,8 @@ PUGI__NS_BEGIN
xpath_stack swapped_stack = {stack.temp, stack.result};
- xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
- xpath_node_set_raw rs = _right->eval_node_set(c, stack);
+ xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
+ xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
rs.set_type(xpath_node_set::type_unsorted);
@@ -9503,7 +9559,7 @@ PUGI__NS_BEGIN
case ast_filter:
case ast_filter_posinv:
{
- xpath_node_set_raw set = _left->eval_node_set(c, stack);
+ xpath_node_set_raw set = _left->eval_node_set(c, stack, nodeset_eval_all);
// either expression is a number or it contains position() call; sort by document order
if (_type == ast_filter) set.sort_do();
@@ -9521,44 +9577,44 @@ PUGI__NS_BEGIN
switch (_axis)
{
case axis_ancestor:
- return step_do(c, stack, axis_to_type<axis_ancestor>());
+ return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
case axis_ancestor_or_self:
- return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
+ return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
case axis_attribute:
- return step_do(c, stack, axis_to_type<axis_attribute>());
+ return step_do(c, stack, eval, axis_to_type<axis_attribute>());
case axis_child:
- return step_do(c, stack, axis_to_type<axis_child>());
+ return step_do(c, stack, eval, axis_to_type<axis_child>());
case axis_descendant:
- return step_do(c, stack, axis_to_type<axis_descendant>());
+ return step_do(c, stack, eval, axis_to_type<axis_descendant>());
case axis_descendant_or_self:
- return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
+ return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
case axis_following:
- return step_do(c, stack, axis_to_type<axis_following>());
+ return step_do(c, stack, eval, axis_to_type<axis_following>());
case axis_following_sibling:
- return step_do(c, stack, axis_to_type<axis_following_sibling>());
+ return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
case axis_namespace:
// namespaced axis is not supported
return xpath_node_set_raw();
case axis_parent:
- return step_do(c, stack, axis_to_type<axis_parent>());
+ return step_do(c, stack, eval, axis_to_type<axis_parent>());
case axis_preceding:
- return step_do(c, stack, axis_to_type<axis_preceding>());
+ return step_do(c, stack, eval, axis_to_type<axis_preceding>());
case axis_preceding_sibling:
- return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
+ return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
case axis_self:
- return step_do(c, stack, axis_to_type<axis_self>());
+ return step_do(c, stack, eval, axis_to_type<axis_self>());
default:
assert(!"Unknown axis");
@@ -11111,7 +11167,7 @@ namespace pugi
if (setjmp(sd.error_handler)) return xpath_node_set();
#endif
- impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
+ impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
return xpath_node_set(r.begin(), r.end(), r.type());
}
@@ -11128,7 +11184,7 @@ namespace pugi
if (setjmp(sd.error_handler)) return xpath_node();
#endif
- impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
+ impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
return r.first();
}
diff --git a/tests/test_xpath.cpp b/tests/test_xpath.cpp
index f7da258..2143376 100644
--- a/tests/test_xpath.cpp
+++ b/tests/test_xpath.cpp
@@ -122,6 +122,28 @@ TEST_XML(xpath_sort_attributes, "<node/>")
xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 5 % 4 % 3;
}
+TEST(xpath_sort_random_medium)
+{
+ xml_document doc;
+ load_document_copy(doc, STR("<node>")
+ STR("<child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2><child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2>")
+ STR("<child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2><child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2>")
+ STR("<child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2><child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2>")
+ STR("</node>"));
+
+ xpath_node_set ns = doc.select_nodes(STR("//node() | //@*"));
+
+ std::vector<xpath_node> nsv(ns.begin(), ns.end());
+ std::random_shuffle(nsv.begin(), nsv.end());
+
+ xpath_node_set copy(&nsv[0], &nsv[0] + nsv.size());
+ copy.sort();
+
+ xpath_node_set_tester tester(copy, "sorted order failed");
+
+ for (unsigned int i = 2; i < 39; ++i) tester % i;
+}
+
TEST(xpath_sort_random_large)
{
xml_document doc;
diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp
index b6f53c7..d791cdf 100644
--- a/tests/test_xpath_paths.cpp
+++ b/tests/test_xpath_paths.cpp
@@ -594,4 +594,26 @@ TEST_XML(xpath_paths_optimize_compare_attribute, "<node id='1' /><node id='2' />
CHECK_XPATH_NODESET(doc, STR("node[@xmlns = '3']"));
}
+TEST_XML(xpath_paths_optimize_step_once, "<node><para1><para2/><para3/><para4><para5 attr5=''/></para4></para1><para6/></node>")
+{
+ CHECK_XPATH_BOOLEAN(doc, STR("node//para2/following::*"), true);
+ CHECK_XPATH_BOOLEAN(doc, STR("node//para6/following::*"), false);
+
+ CHECK_XPATH_STRING(doc, STR("name(node//para2/following::*)"), STR("para3"));
+ CHECK_XPATH_STRING(doc, STR("name(node//para6/following::*)"), STR(""));
+
+ CHECK_XPATH_BOOLEAN(doc, STR("node//para1/preceding::*"), false);
+ CHECK_XPATH_BOOLEAN(doc, STR("node//para6/preceding::*"), true);
+
+ CHECK_XPATH_STRING(doc, STR("name(node//para1/preceding::*)"), STR(""));
+ CHECK_XPATH_STRING(doc, STR("name(node//para6/preceding::*)"), STR("para1"));
+
+ CHECK_XPATH_BOOLEAN(doc, STR("node//para6/preceding::para4"), true);
+
+ CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/ancestor-or-self::*"), true);
+ CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/ancestor::*"), true);
+
+ CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/following::para6"), true);
+ CHECK_XPATH_STRING(doc, STR("name(//@attr5/following::para6)"), STR("para6"));
+}
#endif