From 3a9d992883e5cd5be24a4278f1aa43cf62d2640e Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Sun, 9 May 2010 10:20:17 +0000 Subject: Removed std::string and contents_push from XPath parsing git-svn-id: http://pugixml.googlecode.com/svn/trunk@393 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixpath.cpp | 174 +++++++++++++++++++++++++++--------------------------- 1 file changed, 86 insertions(+), 88 deletions(-) (limited to 'src') diff --git a/src/pugixpath.cpp b/src/pugixpath.cpp index 0912ee2..9d81438 100644 --- a/src/pugixpath.cpp +++ b/src/pugixpath.cpp @@ -15,9 +15,6 @@ #ifndef PUGIXML_NO_XPATH -#include -#include - #include #include @@ -31,6 +28,9 @@ # include #endif +#include +#include + #if defined(_MSC_VER) # pragma warning(disable: 4127) // conditional expression is constant # pragma warning(disable: 4702) // unreachable code @@ -876,6 +876,25 @@ namespace pugi lex_double_colon }; + struct xpath_lexer_string + { + const char_t* begin; + const char_t* end; + + xpath_lexer_string(): begin(0), end(0) + { + } + + bool operator==(const char_t* other) const + { + size_t length = static_cast(end - begin); + + if (!impl::strequalrange(other, begin, length)) return false; + + return other[length] == 0; + } + }; + class xpath_lexer { // disable copy ctor and assignment @@ -884,51 +903,21 @@ namespace pugi private: const char_t* m_cur; - - char_t* m_cur_lexeme_contents; - size_t m_clc_size; - size_t m_clc_capacity; + xpath_lexer_string m_cur_lexeme_contents; lexeme_t m_cur_lexeme; void contents_clear() { - if (m_cur_lexeme_contents) m_cur_lexeme_contents[0] = 0; - m_clc_size = 0; - } - - void contents_push(char_t c) - { - if (m_clc_size == m_clc_capacity) - { - if (!m_clc_capacity) m_clc_capacity = 16; - else m_clc_capacity *= 2; - - char_t* s = new char_t[m_clc_capacity + 1]; - if (m_cur_lexeme_contents) impl::strcpy(s, m_cur_lexeme_contents); - - delete[] m_cur_lexeme_contents; - m_cur_lexeme_contents = s; - } - - m_cur_lexeme_contents[m_clc_size++] = c; - m_cur_lexeme_contents[m_clc_size] = 0; + m_cur_lexeme_contents = xpath_lexer_string(); } public: explicit xpath_lexer(const char_t* query): m_cur(query) { - m_clc_capacity = m_clc_size = 0; - m_cur_lexeme_contents = 0; - next(); } - ~xpath_lexer() - { - delete[] m_cur_lexeme_contents; - } - const char_t* state() const { return m_cur; @@ -1077,13 +1066,13 @@ namespace pugi } else if (is_chartypex(*(m_cur+1), ctx_digit)) { - contents_push('0'); - contents_push('.'); + m_cur_lexeme_contents.begin = m_cur; // . ++m_cur; - while (is_chartypex(*m_cur, ctx_digit)) - contents_push(*m_cur++); + while (is_chartypex(*m_cur, ctx_digit)) m_cur++; + + m_cur_lexeme_contents.end = m_cur; m_cur_lexeme = lex_number; } @@ -1107,8 +1096,9 @@ namespace pugi ++m_cur; - while (*m_cur && *m_cur != terminator) - contents_push(*m_cur++); + m_cur_lexeme_contents.begin = m_cur; + while (*m_cur && *m_cur != terminator) m_cur++; + m_cur_lexeme_contents.end = m_cur; if (!*m_cur) m_cur_lexeme = lex_none; @@ -1136,39 +1126,42 @@ namespace pugi default: if (is_chartypex(*m_cur, ctx_digit)) { - while (is_chartypex(*m_cur, ctx_digit)) - contents_push(*m_cur++); + m_cur_lexeme_contents.begin = m_cur; + + while (is_chartypex(*m_cur, ctx_digit)) m_cur++; if (*m_cur == '.' && is_chartypex(*(m_cur+1), ctx_digit)) { - contents_push(*m_cur++); + m_cur++; - while (is_chartypex(*m_cur, ctx_digit)) - contents_push(*m_cur++); + while (is_chartypex(*m_cur, ctx_digit)) m_cur++; } + m_cur_lexeme_contents.end = m_cur; + m_cur_lexeme = lex_number; } else if (is_chartypex(*m_cur, ctx_start_symbol)) { - while (is_chartypex(*m_cur, ctx_symbol)) - contents_push(*m_cur++); + m_cur_lexeme_contents.begin = m_cur; + + while (is_chartypex(*m_cur, ctx_symbol)) m_cur++; if (m_cur[0] == ':') { if (m_cur[1] == '*') // namespace test ncname:* { - contents_push(*m_cur++); // : - contents_push(*m_cur++); // * + m_cur += 2; // :* } else if (is_chartypex(m_cur[1], ctx_symbol)) // namespace test qname { - contents_push(*m_cur++); // : + m_cur++; // : - while (is_chartypex(*m_cur, ctx_symbol)) - contents_push(*m_cur++); + while (is_chartypex(*m_cur, ctx_symbol)) m_cur++; } } + + m_cur_lexeme_contents.end = m_cur; while (is_chartypex(*m_cur, ctx_space)) ++m_cur; @@ -1186,9 +1179,9 @@ namespace pugi return m_cur_lexeme; } - const char_t* contents() const + const xpath_lexer_string& contents() const { - return m_cur_lexeme_contents ? m_cur_lexeme_contents : PUGIXML_TEXT(""); + return m_cur_lexeme_contents; } }; @@ -1931,18 +1924,22 @@ namespace pugi } } - void set_contents(const char_t* value, xpath_allocator& a) + void set_contents(const xpath_lexer_string& value, xpath_allocator& a) { - if (value) + if (value.begin) { - char_t* c = static_cast(a.alloc((impl::strlen(value) + 1) * sizeof(char_t))); - impl::strcpy(c, value); + size_t length = static_cast(value.end - value.begin); + + char_t* c = static_cast(a.alloc((length + 1) * sizeof(char_t))); + memcpy(c, value.begin, length * sizeof(char_t)); + c[length] = 0; + m_contents = c; } else m_contents = 0; } public: - xpath_ast_node(ast_type_t type, const char_t* contents, xpath_allocator& a): m_type(type), + xpath_ast_node(ast_type_t type, const xpath_lexer_string& contents, xpath_allocator& a): m_type(type), m_rettype(xpath_type_none), m_left(0), m_right(0), m_third(0), m_next(0), m_contents(0), m_axis(axis_self), m_test(nodetest_none) { @@ -1961,7 +1958,7 @@ namespace pugi { } - xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents, xpath_allocator& a): + xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const xpath_lexer_string& contents, xpath_allocator& a): m_type(type), m_rettype(xpath_type_none), m_left(left), m_right(0), m_third(0), m_next(0), m_contents(0), m_axis(axis), m_test(test) { @@ -2868,9 +2865,9 @@ namespace pugi xpath_parser(const xpath_parser&); xpath_parser& operator=(const xpath_parser&); - ast_type_t parse_function_name(const string_t& name, size_t argc) + ast_type_t parse_function_name(const xpath_lexer_string& name, size_t argc) { - switch (name[0]) + switch (name.begin[0]) { case 'b': if (name == PUGIXML_TEXT("boolean") && argc == 1) @@ -2970,11 +2967,11 @@ namespace pugi return ast_none; } - axis_t parse_axis_name(const string_t& name, bool& specified) + axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) { specified = true; - switch (name[0]) + switch (name.begin[0]) { case 'a': if (name == PUGIXML_TEXT("ancestor")) @@ -3035,30 +3032,30 @@ namespace pugi return axis_child; } - nodetest_t parse_node_test_type(const char_t* name) + nodetest_t parse_node_test_type(const xpath_lexer_string& name) { - switch (name[0]) + switch (name.begin[0]) { case 'c': - if (impl::strequal(name, PUGIXML_TEXT("comment"))) + if (name == PUGIXML_TEXT("comment")) return nodetest_type_comment; break; case 'n': - if (impl::strequal(name, PUGIXML_TEXT("node"))) + if (name == PUGIXML_TEXT("node")) return nodetest_type_node; break; case 'p': - if (impl::strequal(name, PUGIXML_TEXT("processing-instruction"))) + if (name == PUGIXML_TEXT("processing-instruction")) return nodetest_type_pi; break; case 't': - if (impl::strequal(name, PUGIXML_TEXT("text"))) + if (name == PUGIXML_TEXT("text")) return nodetest_type_text; break; @@ -3120,7 +3117,7 @@ namespace pugi xpath_ast_node* args[4]; size_t argc = 0; - string_t function = m_lexer.contents(); + xpath_lexer_string function = m_lexer.contents(); m_lexer.next(); bool func_concat = (function == PUGIXML_TEXT("concat")); @@ -3227,17 +3224,17 @@ namespace pugi { m_lexer.next(); - return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0, m_alloc); + return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, xpath_lexer_string(), m_alloc); } else if (m_lexer.current() == lex_double_dot) { m_lexer.next(); - return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0, m_alloc); + return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, xpath_lexer_string(), m_alloc); } nodetest_t nt_type = nodetest_none; - string_t nt_name; + xpath_lexer_string nt_name; if (m_lexer.current() == lex_string) { @@ -3261,6 +3258,7 @@ namespace pugi if (m_lexer.current() == lex_multiply) { nt_type = nodetest_all; + nt_name = xpath_lexer_string(); m_lexer.next(); } else if (m_lexer.current() == lex_string) @@ -3282,11 +3280,11 @@ namespace pugi { m_lexer.next(); - nt_type = parse_node_test_type(nt_name.c_str()); + nt_type = parse_node_test_type(nt_name); if (nt_type == nodetest_none) throw xpath_exception("Unrecognized node type"); - nt_name = string_t(); + nt_name = xpath_lexer_string(); } else if (nt_name == PUGIXML_TEXT("processing-instruction")) { @@ -3308,11 +3306,11 @@ namespace pugi // QName or NCName:* else { - string_t::size_type colon_pos = nt_name.find(':'); + const char_t* colon_pos = std::char_traits::find(nt_name.begin, static_cast(nt_name.end - nt_name.begin), ':'); - if (nt_name.size() > 2 && colon_pos == nt_name.size() - 2 && nt_name[nt_name.size() - 1] == '*') // NCName:* + if (colon_pos && colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:* { - nt_name.erase(nt_name.size() - 1); // erase * + nt_name.end--; // erase * nt_type = nodetest_all_in_namespace; } @@ -3327,7 +3325,7 @@ namespace pugi } else throw xpath_exception("Unrecognized node test"); - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name.c_str(), m_alloc); + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name, m_alloc); xpath_ast_node* last = 0; @@ -3363,7 +3361,7 @@ namespace pugi m_lexer.next(); if (l == lex_double_slash) - n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0, m_alloc); + n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); n = parse_step(n); } @@ -3402,7 +3400,7 @@ namespace pugi m_lexer.next(); xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step_root); - n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0, m_alloc); + n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); return parse_relative_location_path(n); } @@ -3450,7 +3448,7 @@ namespace pugi m_lexer.next(); if (l == lex_double_slash) - n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0, m_alloc); + n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); // select from location path return parse_relative_location_path(n); @@ -3501,10 +3499,10 @@ namespace pugi xpath_ast_node* n = parse_unary_expression(); while (m_lexer.current() == lex_multiply || (m_lexer.current() == lex_string && - (impl::strequal(m_lexer.contents(), PUGIXML_TEXT("mod")) || impl::strequal(m_lexer.contents(), PUGIXML_TEXT("div"))))) + (m_lexer.contents() == PUGIXML_TEXT("mod") || m_lexer.contents() == PUGIXML_TEXT("div")))) { ast_type_t op = m_lexer.current() == lex_multiply ? ast_op_multiply : - impl::strequal(m_lexer.contents(), PUGIXML_TEXT("div")) ? ast_op_divide : ast_op_mod; + m_lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod; m_lexer.next(); xpath_ast_node* expr = parse_unary_expression(); @@ -3586,7 +3584,7 @@ namespace pugi { xpath_ast_node* n = parse_equality_expression(); - while (m_lexer.current() == lex_string && impl::strequal(m_lexer.contents(), PUGIXML_TEXT("and"))) + while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("and")) { m_lexer.next(); @@ -3603,7 +3601,7 @@ namespace pugi { xpath_ast_node* n = parse_and_expression(); - while (m_lexer.current() == lex_string && impl::strequal(m_lexer.contents(), PUGIXML_TEXT("or"))) + while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("or")) { m_lexer.next(); -- cgit v1.2.3