From 5da51dff270f430701b26428c9422f21e0ea4c9c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 16 Oct 2014 03:46:42 +0000 Subject: XPath: Optimize attribute axis lookup When looking for an attribute by name, finding the first attribute means we can stop looking since attribute names are unique. This makes some queries faster by 40%. Another very common pattern in XPath queries is finding an attribute with a specified value using a predicate (@name = 'value'). While we perform an optimal amount of traversal in that case, there is a substantial overhead with evaluating the nodes, saving and restoring the stack state, pushing the attribute node into a set, etc. Detecting this pattern allows us to use optimized code, resulting in up to 2x speedup for some queries. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1061 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 57 ++++++++++++++++++++++++++++++++++------------ tests/test_xpath_paths.cpp | 18 +++++++++++++++ 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 484f34b..71c3f5d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7473,6 +7473,11 @@ PUGI__NS_BEGIN *write = 0; } + inline bool is_xpath_attribute(const char_t* name) + { + return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); + } + struct xpath_variable_boolean: xpath_variable { xpath_variable_boolean(): value(false) @@ -8217,7 +8222,6 @@ PUGI__NS_BEGIN ast_func_normalize_space_0, // normalize-space() ast_func_normalize_space_1, // normalize-space(left) ast_func_translate, // translate(left, right, third) - ast_func_translate_table, // translate(left, right, third) where right/third are constants ast_func_boolean, // boolean(left) ast_func_not, // not(left) ast_func_true, // true() @@ -8230,7 +8234,10 @@ PUGI__NS_BEGIN ast_func_ceiling, // ceiling(left) ast_func_round, // round(left) ast_step, // process set left with step - ast_step_root // select root node + ast_step_root, // select root node + + ast_opt_translate_table, // translate(left, right, third) where right/third are constants + ast_opt_compare_attribute // @name = 'string' }; enum axis_t @@ -8296,7 +8303,7 @@ PUGI__NS_BEGIN xpath_variable* variable; // node test for ast_step (node name/namespace/node type/pi target) const char_t* nodetest; - // table for ast_func_translate_table + // table for ast_opt_translate_table const unsigned char* table; } _data; @@ -8498,36 +8505,38 @@ PUGI__NS_BEGIN } } - void step_push(xpath_node_set_raw& ns, const xml_attribute a, const xml_node parent, xpath_allocator* alloc) + bool step_push(xpath_node_set_raw& ns, const xml_attribute a, const xml_node parent, xpath_allocator* alloc) { - if (!a) return; + if (!a) return false; const char_t* name = a.name(); - // There are no attribute nodes corresponding to attributes that declare namespaces - // That is, "xmlns:..." or "xmlns" - if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return; - switch (_test) { case nodetest_name: - if (strequal(name, _data.nodetest)) + if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) + { ns.push_back(xpath_node(a, parent), alloc); + return true; + } break; case nodetest_type_node: case nodetest_all: - ns.push_back(xpath_node(a, parent), alloc); + if (is_xpath_attribute(name)) + ns.push_back(xpath_node(a, parent), alloc); break; case nodetest_all_in_namespace: - if (starts_with(name, _data.nodetest)) + if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) ns.push_back(xpath_node(a, parent), alloc); break; default: ; } + + return false; } void step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc) @@ -8589,7 +8598,8 @@ PUGI__NS_BEGIN case axis_attribute: { for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) - step_push(ns, a, n, alloc); + if (step_push(ns, a, n, alloc)) + break; // step_push returns true if it found a unique attribute break; } @@ -9007,6 +9017,15 @@ PUGI__NS_BEGIN return false; } + case ast_opt_compare_attribute: + { + const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); + + xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); + + return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); + } + case ast_variable: { assert(_rettype == _data.variable->type()); @@ -9412,7 +9431,7 @@ PUGI__NS_BEGIN return s; } - case ast_func_translate_table: + case ast_opt_translate_table: { xpath_string s = _left->eval_string(c, stack); @@ -9610,10 +9629,18 @@ PUGI__NS_BEGIN if (table) { - _type = ast_func_translate_table; + _type = ast_opt_translate_table; _data.table = table; } } + + // Use optimized path for @attr = 'value' or @attr = $value + if (_type == ast_op_equal && + _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && + (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->rettype() == xpath_type_string))) + { + _type = ast_opt_compare_attribute; + } } bool is_posinv_expr() const diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index 43096bc..4528acd 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -561,4 +561,22 @@ TEST_XML(xpath_paths_unsorted_child, "") +{ + CHECK_XPATH_NODESET(doc, STR("node[@id = '1']")) % 2; + CHECK_XPATH_NODESET(doc, STR("node[@id = '2']")) % 4; + CHECK_XPATH_NODESET(doc, STR("node[@id = 2]")) % 4; + CHECK_XPATH_NODESET(doc, STR("node[@id[. > 3] = '2']")); + CHECK_XPATH_NODESET(doc, STR("node['1' = @id]")) % 2; + + xpath_variable_set set; + set.set(STR("var1"), STR("2")); + set.set(STR("var2"), 2.0); + + CHECK_XPATH_NODESET_VAR(doc, STR("node[@id = $var1]"), &set) % 4; + CHECK_XPATH_NODESET_VAR(doc, STR("node[@id = $var2]"), &set) % 4; + + CHECK_XPATH_NODESET(doc, STR("node[@xmlns = '3']")); +} + #endif -- cgit v1.2.3