summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2014-10-16 03:46:42 +0000
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2014-10-16 03:46:42 +0000
commit5da51dff270f430701b26428c9422f21e0ea4c9c (patch)
treead2e321fd97417a357cab55997e35e0d188b229b /src
parent883031fb45cf0f86cd36b20ad4762da58dd6126c (diff)
XPath: Optimize attribute axis lookup
When looking for an attribute by name, finding the first attribute means we can stop looking since attribute names are unique. This makes some queries faster by 40%. Another very common pattern in XPath queries is finding an attribute with a specified value using a predicate (@name = 'value'). While we perform an optimal amount of traversal in that case, there is a substantial overhead with evaluating the nodes, saving and restoring the stack state, pushing the attribute node into a set, etc. Detecting this pattern allows us to use optimized code, resulting in up to 2x speedup for some queries. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1061 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'src')
-rw-r--r--src/pugixml.cpp57
1 files changed, 42 insertions, 15 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 484f34b..71c3f5d 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -7473,6 +7473,11 @@ PUGI__NS_BEGIN
*write = 0;
}
+ inline bool is_xpath_attribute(const char_t* name)
+ {
+ return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
+ }
+
struct xpath_variable_boolean: xpath_variable
{
xpath_variable_boolean(): value(false)
@@ -8217,7 +8222,6 @@ PUGI__NS_BEGIN
ast_func_normalize_space_0, // normalize-space()
ast_func_normalize_space_1, // normalize-space(left)
ast_func_translate, // translate(left, right, third)
- ast_func_translate_table, // translate(left, right, third) where right/third are constants
ast_func_boolean, // boolean(left)
ast_func_not, // not(left)
ast_func_true, // true()
@@ -8230,7 +8234,10 @@ PUGI__NS_BEGIN
ast_func_ceiling, // ceiling(left)
ast_func_round, // round(left)
ast_step, // process set left with step
- ast_step_root // select root node
+ ast_step_root, // select root node
+
+ ast_opt_translate_table, // translate(left, right, third) where right/third are constants
+ ast_opt_compare_attribute // @name = 'string'
};
enum axis_t
@@ -8296,7 +8303,7 @@ PUGI__NS_BEGIN
xpath_variable* variable;
// node test for ast_step (node name/namespace/node type/pi target)
const char_t* nodetest;
- // table for ast_func_translate_table
+ // table for ast_opt_translate_table
const unsigned char* table;
} _data;
@@ -8498,36 +8505,38 @@ PUGI__NS_BEGIN
}
}
- void step_push(xpath_node_set_raw& ns, const xml_attribute a, const xml_node parent, xpath_allocator* alloc)
+ bool step_push(xpath_node_set_raw& ns, const xml_attribute a, const xml_node parent, xpath_allocator* alloc)
{
- if (!a) return;
+ if (!a) return false;
const char_t* name = a.name();
- // There are no attribute nodes corresponding to attributes that declare namespaces
- // That is, "xmlns:..." or "xmlns"
- if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
-
switch (_test)
{
case nodetest_name:
- if (strequal(name, _data.nodetest))
+ if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
+ {
ns.push_back(xpath_node(a, parent), alloc);
+ return true;
+ }
break;
case nodetest_type_node:
case nodetest_all:
- ns.push_back(xpath_node(a, parent), alloc);
+ if (is_xpath_attribute(name))
+ ns.push_back(xpath_node(a, parent), alloc);
break;
case nodetest_all_in_namespace:
- if (starts_with(name, _data.nodetest))
+ if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
ns.push_back(xpath_node(a, parent), alloc);
break;
default:
;
}
+
+ return false;
}
void step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc)
@@ -8589,7 +8598,8 @@ PUGI__NS_BEGIN
case axis_attribute:
{
for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
- step_push(ns, a, n, alloc);
+ if (step_push(ns, a, n, alloc))
+ break; // step_push returns true if it found a unique attribute
break;
}
@@ -9007,6 +9017,15 @@ PUGI__NS_BEGIN
return false;
}
+ case ast_opt_compare_attribute:
+ {
+ const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
+
+ xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
+
+ return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
+ }
+
case ast_variable:
{
assert(_rettype == _data.variable->type());
@@ -9412,7 +9431,7 @@ PUGI__NS_BEGIN
return s;
}
- case ast_func_translate_table:
+ case ast_opt_translate_table:
{
xpath_string s = _left->eval_string(c, stack);
@@ -9610,10 +9629,18 @@ PUGI__NS_BEGIN
if (table)
{
- _type = ast_func_translate_table;
+ _type = ast_opt_translate_table;
_data.table = table;
}
}
+
+ // Use optimized path for @attr = 'value' or @attr = $value
+ if (_type == ast_op_equal &&
+ _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
+ (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->rettype() == xpath_type_string)))
+ {
+ _type = ast_opt_compare_attribute;
+ }
}
bool is_posinv_expr() const