diff options
author | arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> | 2010-06-28 06:47:08 +0000 |
---|---|---|
committer | arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> | 2010-06-28 06:47:08 +0000 |
commit | 8dc819a700dcdedc9d5ed062a23dd6ecaccdc0e1 (patch) | |
tree | 2a46120067f84f72554db02f8f15e25871c4e2c8 | |
parent | d862deefcff1013ced77c238b2711e8023a1cfe7 (diff) |
XPath: Argument parsing and position invariance refactoring, reduced AST node size, moved literal string->number conversion to compile time
git-svn-id: http://pugixml.googlecode.com/svn/trunk@549 99668b35-9821-0410-8761-19e4c4f06640
-rw-r--r-- | src/pugixpath.cpp | 254 |
1 files changed, 105 insertions, 149 deletions
diff --git a/src/pugixpath.cpp b/src/pugixpath.cpp index 5248555..f5663bd 100644 --- a/src/pugixpath.cpp +++ b/src/pugixpath.cpp @@ -474,6 +474,29 @@ namespace return atof(string);
#endif
}
+
+ double convert_string_to_number(const char_t* begin, const char_t* end)
+ {
+ char_t buffer[32];
+
+ size_t length = static_cast<size_t>(end - begin);
+
+ if (length < sizeof(buffer) / sizeof(buffer[0]))
+ {
+ // optimized on-stack conversion
+ memcpy(buffer, begin, length * sizeof(char_t));
+ buffer[length] = 0;
+
+ return convert_string_to_number(buffer);
+ }
+ else
+ {
+ // need to make dummy on-heap copy
+ string_t copy(begin, end);
+
+ return convert_string_to_number(copy.c_str());
+ }
+ }
double round_nearest(double value)
{
@@ -1313,24 +1336,29 @@ namespace pugi class xpath_ast_node
{
private:
- ast_type_t m_type;
-
- xpath_value_type m_rettype;
+ // node type
+ char m_type;
+ char m_rettype;
+
+ // for ast_step / ast_predicate
+ char m_axis;
+ char m_test;
// tree node structure
xpath_ast_node* m_left;
xpath_ast_node* m_right;
- xpath_ast_node* m_third;
xpath_ast_node* m_next;
- // string value for ast_constant
- // node test for ast_step (node name/namespace/node type/pi target)
- const char_t* m_contents;
+ union
+ {
+ // value for ast_string_constant
+ const char_t* string;
+ // value for ast_number_constant
+ double number;
+ // node test for ast_step (node name/namespace/node type/pi target)
+ const char_t* nodetest;
+ } m_data;
- // for t_step / t_predicate
- axis_t m_axis;
- nodetest_t m_test;
-
xpath_ast_node(const xpath_ast_node&);
xpath_ast_node& operator=(const xpath_ast_node&);
@@ -1503,14 +1531,16 @@ namespace pugi {
if (!a) return;
+ const char_t* name = a.name();
+
// There are no attribute nodes corresponding to attributes that declare namespaces
// That is, "xmlns:..." or "xmlns"
- if (starts_with(a.name(), PUGIXML_TEXT("xmlns")) && (a.name()[5] == 0 || a.name()[5] == ':')) return;
+ if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
switch (m_test)
{
case nodetest_name:
- if (impl::strequal(a.name(), m_contents)) ns.push_back(xpath_node(a, parent));
+ if (impl::strequal(name, m_data.nodetest)) ns.push_back(xpath_node(a, parent));
break;
case nodetest_type_node:
@@ -1519,7 +1549,7 @@ namespace pugi break;
case nodetest_all_in_namespace:
- if (starts_with(a.name(), m_contents))
+ if (starts_with(name, m_data.nodetest))
ns.push_back(xpath_node(a, parent));
break;
@@ -1535,7 +1565,7 @@ namespace pugi switch (m_test)
{
case nodetest_name:
- if (n.type() == node_element && impl::strequal(n.name(), m_contents)) ns.push_back(n);
+ if (n.type() == node_element && impl::strequal(n.name(), m_data.nodetest)) ns.push_back(n);
break;
case nodetest_type_node:
@@ -1558,7 +1588,7 @@ namespace pugi break;
case nodetest_pi:
- if (n.type() == node_pi && impl::strequal(n.name(), m_contents))
+ if (n.type() == node_pi && impl::strequal(n.name(), m_data.nodetest))
ns.push_back(n);
break;
@@ -1568,7 +1598,7 @@ namespace pugi break;
case nodetest_all_in_namespace:
- if (n.type() == node_element && starts_with(n.name(), m_contents))
+ if (n.type() == node_element && starts_with(n.name(), m_data.nodetest))
ns.push_back(n);
break;
@@ -1940,7 +1970,7 @@ namespace pugi }
}
- void set_contents(const xpath_lexer_string& value, xpath_allocator& a)
+ static const char_t* duplicate_string(const xpath_lexer_string& value, xpath_allocator& a)
{
if (value.begin)
{
@@ -1950,35 +1980,34 @@ namespace pugi memcpy(c, value.begin, length * sizeof(char_t));
c[length] = 0;
- m_contents = c;
+ return c;
}
- else m_contents = 0;
+ else return 0;
}
public:
- xpath_ast_node(ast_type_t type, xpath_value_type rettype, const xpath_lexer_string& contents, xpath_allocator& a):
- m_type(type), m_rettype(rettype), m_left(0), m_right(0), m_third(0), m_next(0), m_contents(0),
- m_axis(axis_self), m_test(nodetest_none)
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype, const xpath_lexer_string& value, xpath_allocator& a):
+ m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0)
{
- set_contents(contents, a);
+ assert(type == ast_string_constant);
+ m_data.string = duplicate_string(value, a);
}
-
- xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, axis_t axis):
- m_type(type), m_rettype(xpath_type_node_set), m_left(left), m_right(right), m_third(0), m_next(0), m_contents(0),
- m_axis(axis), m_test(nodetest_none)
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value):
+ m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0)
{
+ assert(type == ast_number_constant);
+ m_data.number = value;
}
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0, xpath_ast_node* third = 0):
- m_type(type), m_rettype(rettype), m_left(left), m_right(right), m_third(third), m_next(0), m_contents(0),
- m_axis(axis_self), m_test(nodetest_none)
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
+ m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(left), m_right(right), m_next(0)
{
}
xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const xpath_lexer_string& contents, xpath_allocator& a):
- m_type(type), m_rettype(xpath_type_node_set), m_left(left), m_right(0), m_third(0), m_next(0), m_contents(0),
- m_axis(axis), m_test(test)
+ m_type((char)type), m_rettype(xpath_type_node_set), m_axis((char)axis), m_test((char)test), m_left(left), m_right(0), m_next(0)
{
- set_contents(contents, a);
+ m_data.nodetest = duplicate_string(contents, a);
}
void set_next(xpath_ast_node* value)
@@ -2116,7 +2145,7 @@ namespace pugi return -m_left->eval_number(c);
case ast_number_constant:
- return convert_string_to_number(m_contents);
+ return m_data.number;
case ast_func_last:
return (double)c.size;
@@ -2195,7 +2224,7 @@ namespace pugi switch (m_type)
{
case ast_string_constant:
- return m_contents;
+ return m_data.string;
case ast_func_local_name_0:
{
@@ -2307,7 +2336,7 @@ namespace pugi {
string_t s = m_left->eval_string(c);
double first = round_nearest(m_right->eval_number(c));
- double last = first + round_nearest(m_third->eval_number(c));
+ double last = first + round_nearest(m_right->m_next->eval_number(c));
if (is_nan(first) || is_nan(last)) return string_t();
else if (first >= s.length() + 1) return string_t();
@@ -2351,7 +2380,7 @@ namespace pugi {
string_t s = m_left->eval_string(c);
string_t from = m_right->eval_string(c);
- string_t to = m_third->eval_string(c);
+ string_t to = m_right->m_next->eval_string(c);
for (string_t::iterator it = s.begin(); it != s.end(); )
{
@@ -2511,106 +2540,40 @@ namespace pugi }
}
- bool contains(ast_type_t type)
+ bool is_posinv()
{
- if (m_type == type) return true;
-
switch (m_type)
{
- case ast_op_or:
- case ast_op_and:
- case ast_op_equal:
- case ast_op_not_equal:
- case ast_op_less:
- case ast_op_greater:
- case ast_op_less_or_equal:
- case ast_op_greater_or_equal:
- case ast_op_add:
- case ast_op_subtract:
- case ast_op_multiply:
- case ast_op_divide:
- case ast_op_mod:
- case ast_op_negate:
- return m_left->contains(type) || m_right->contains(type);
-
- case ast_op_union:
- case ast_predicate:
- case ast_filter:
- case ast_filter_posinv:
+ case ast_func_position:
return false;
-
+
case ast_string_constant:
case ast_number_constant:
- case ast_func_last:
- case ast_func_position:
- return false;
-
- case ast_func_count:
- case ast_func_id:
- case ast_func_local_name_0:
- case ast_func_local_name_1:
- case ast_func_namespace_uri_0:
- case ast_func_namespace_uri_1:
- case ast_func_name_0:
- case ast_func_name_1:
- case ast_func_string_0:
- case ast_func_string_1:
- if (m_left) return m_left->contains(type);
- return false;
-
- case ast_func_concat:
- {
- if (m_left->contains(type)) return true;
-
- for (xpath_ast_node* n = m_right; n; n = n->m_next)
- if (n->contains(type)) return true;
-
- return false;
- }
-
- case ast_func_starts_with:
- case ast_func_contains:
- case ast_func_substring_before:
- case ast_func_substring_after:
- case ast_func_substring_2:
- case ast_func_substring_3:
- case ast_func_string_length_0:
- case ast_func_string_length_1:
- case ast_func_normalize_space_0:
- case ast_func_normalize_space_1:
- case ast_func_translate:
- case ast_func_boolean:
- case ast_func_not:
- case ast_func_true:
- case ast_func_false:
- case ast_func_lang:
- case ast_func_number_0:
- case ast_func_number_1:
- case ast_func_sum:
- case ast_func_floor:
- case ast_func_ceiling:
- case ast_func_round:
- if (m_left && m_left->contains(type)) return true;
- if (m_right && m_right->contains(type)) return true;
- if (m_third && m_third->contains(type)) return true;
+ // $$ case ast_variable:
+ return true;
- return false;
-
case ast_step:
case ast_step_root:
- return false;
-
+ return true;
+
+ case ast_predicate:
+ case ast_filter:
+ case ast_filter_posinv:
+ return true;
+
default:
- throw xpath_exception("Unknown semantics error");
-#ifdef __DMC__
- return false; // Digital Mars C++
-#endif
+ if (m_left && !m_left->is_posinv()) return false;
+
+ for (xpath_ast_node* n = m_right; n; n = n->m_next)
+ if (!n->is_posinv()) return false;
+
+ return true;
}
}
xpath_value_type rettype() const
{
- return m_rettype;
+ return static_cast<xpath_value_type>(m_rettype);
}
};
@@ -2628,7 +2591,7 @@ namespace pugi xpath_parser(const xpath_parser&);
xpath_parser& operator=(const xpath_parser&);
- xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[4])
+ xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
{
assert(argc <= 1);
@@ -2637,7 +2600,7 @@ namespace pugi return new (m_alloc.node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
}
- xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[4])
+ xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
{
switch (name.begin[0])
{
@@ -2655,7 +2618,7 @@ namespace pugi }
else if (name == PUGIXML_TEXT("contains") && argc == 2)
return new (m_alloc.node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("concat") && argc == 2)
+ else if (name == PUGIXML_TEXT("concat") && argc >= 2)
return new (m_alloc.node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
return new (m_alloc.node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
@@ -2724,7 +2687,7 @@ namespace pugi else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
return new (m_alloc.node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
- return new (m_alloc.node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1], args[2]);
+ return new (m_alloc.node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
else if (name == PUGIXML_TEXT("sum") && argc == 1)
{
if (args[0]->rettype() != xpath_type_node_set) throw xpath_exception("sum() has to be applied to node set");
@@ -2735,7 +2698,7 @@ namespace pugi case 't':
if (name == PUGIXML_TEXT("translate") && argc == 3)
- return new (m_alloc.node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1], args[2]);
+ return new (m_alloc.node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
else if (name == PUGIXML_TEXT("true") && argc == 0)
return new (m_alloc.node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
@@ -2884,7 +2847,9 @@ namespace pugi case lex_number:
{
- xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_number_constant, xpath_type_number, m_lexer.contents(), m_alloc);
+ double value = convert_string_to_number(m_lexer.contents().begin, m_lexer.contents().end);
+
+ xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
m_lexer.next();
return n;
@@ -2892,14 +2857,13 @@ namespace pugi case lex_string:
{
- xpath_ast_node* args[4] = {0};
+ xpath_ast_node* args[2] = {0};
size_t argc = 0;
xpath_lexer_string function = m_lexer.contents();
m_lexer.next();
- bool func_concat = (function == PUGIXML_TEXT("concat"));
- xpath_ast_node* last_concat = 0;
+ xpath_ast_node* last_arg = 0;
if (m_lexer.current() != lex_open_brace)
throw xpath_exception("Unrecognized function call");
@@ -2916,19 +2880,11 @@ namespace pugi xpath_ast_node* n = parse_expression();
- if (func_concat)
- {
- if (argc < 2) args[argc++] = last_concat = n;
- else
- {
- last_concat->set_next(n);
- last_concat = n;
- }
- }
- else if (argc >= 4)
- throw xpath_exception("Too many function arguments");
- else
- args[argc++] = n;
+ if (argc < 2) args[argc] = n;
+ else last_arg->set_next(n);
+
+ argc++;
+ last_arg = n;
}
m_lexer.next();
@@ -2959,9 +2915,9 @@ namespace pugi if (n->rettype() != xpath_type_node_set) throw xpath_exception("Predicate has to be applied to node set");
- bool posinv = expr->rettype() != xpath_type_number && !expr->contains(ast_func_position);
+ bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
- n = new (m_alloc.node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, n, expr, axis_child);
+ n = new (m_alloc.node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
if (m_lexer.current() != lex_close_square_brace)
throw xpath_exception("Unmatched square brace");
@@ -3107,7 +3063,7 @@ namespace pugi xpath_ast_node* expr = parse_expression();
- xpath_ast_node* pred = new (m_alloc.node()) xpath_ast_node(ast_predicate, expr, 0, axis);
+ xpath_ast_node* pred = new (m_alloc.node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
if (m_lexer.current() != lex_close_square_brace)
throw xpath_exception("unmatched square brace");
|