From 5239fcbd0030ce7dcca1ee05dab0e8be3a98a288 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 5 Oct 2014 07:57:58 +0000 Subject: XPath: Implement optimized translate() translate() with constant arguments now uses a 128-byte table and a table lookup instead of searching characters in the source string. The table is generated during query optimization. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1052 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 100 +++++++++++++++++++++++++++++++++++++---- tests/test_xpath_functions.cpp | 10 +++++ 2 files changed, 101 insertions(+), 9 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 8417c44..df58d8c 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -432,6 +432,7 @@ PUGI__NS_BEGIN // get header xml_memory_string_header* header = static_cast(static_cast(string)) - 1; + assert(header); // deallocate size_t page_offset = sizeof(xml_memory_page) + header->page_offset; @@ -7345,10 +7346,8 @@ PUGI__NS_BEGIN *write = 0; } - PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to) + PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) { - size_t to_length = strlength(to); - char_t* write = buffer; while (*buffer) @@ -7367,6 +7366,65 @@ PUGI__NS_BEGIN *write = 0; } + PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) + { + unsigned char table[128] = {}; + + while (*from) + { + unsigned int fc = static_cast(*from); + unsigned int tc = static_cast(*to); + + if (fc >= 128 || tc >= 128) + return 0; + + if (!table[fc]) + table[fc] = tc ? static_cast(tc) : 128; + + from++; + if (tc) to++; + } + + for (int i = 0; i < 128; ++i) + if (!table[i]) + table[i] = static_cast(i); + + void* result = alloc->allocate_nothrow(sizeof(table)); + + if (result) + { + memcpy(result, table, sizeof(table)); + } + + return static_cast(result); + } + + PUGI__FN void translate_table(char_t* buffer, const unsigned char* table) + { + char_t* write = buffer; + + while (*buffer) + { + char_t ch = *buffer++; + unsigned int index = static_cast(ch); + + if (index < 128) + { + unsigned char code = table[index]; + + *write = static_cast(code); + write += 1 - (code >> 7); + } + else + { + *write++ = ch; + } + } + + // zero-terminate + *write = 0; + } + struct xpath_variable_boolean: xpath_variable { xpath_variable_boolean(): value(false) @@ -8111,6 +8169,7 @@ PUGI__NS_BEGIN ast_func_normalize_space_0, // normalize-space() ast_func_normalize_space_1, // normalize-space(left) ast_func_translate, // translate(left, right, third) + ast_func_translate_table, // translate(left, right, third) where right/third are constants ast_func_boolean, // boolean(left) ast_func_not, // not(left) ast_func_true, // true() @@ -8189,6 +8248,8 @@ PUGI__NS_BEGIN xpath_variable* variable; // node test for ast_step (node name/namespace/node type/pi target) const char_t* nodetest; + // table for ast_func_translate_table + const unsigned char* table; } _data; xpath_ast_node(const xpath_ast_node&); @@ -9298,7 +9359,16 @@ PUGI__NS_BEGIN xpath_string from = _right->eval_string(c, swapped_stack); xpath_string to = _right->_next->eval_string(c, swapped_stack); - translate(s.data(stack.result), from.c_str(), to.c_str()); + translate(s.data(stack.result), from.c_str(), to.c_str(), to.length()); + + return s; + } + + case ast_func_translate_table: + { + xpath_string s = _left->eval_string(c, stack); + + translate_table(s.data(stack.result), _data.table); return s; } @@ -9468,11 +9538,11 @@ PUGI__NS_BEGIN } } - void optimize() + void optimize(xpath_allocator* alloc) { - if (_left) _left->optimize(); - if (_right) _right->optimize(); - if (_next) _next->optimize(); + if (_left) _left->optimize(alloc); + if (_right) _right->optimize(alloc); + if (_next) _next->optimize(alloc); // Replace descendant-or-self::node()/child::foo with descendant::foo // The former is a full form of //foo, the latter is much faster since it executes the node test immediately @@ -9484,6 +9554,18 @@ PUGI__NS_BEGIN _axis = axis_descendant; _left = _left->_left; } + + // Replace translate() with constant arguments with a table + if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) + { + unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); + + if (table) + { + _type = ast_func_translate_table; + _data.table = table; + } + } } bool is_posinv_expr() const @@ -10838,7 +10920,7 @@ namespace pugi if (qimpl->root) { - qimpl->root->optimize(); + qimpl->root->optimize(&qimpl->alloc); _impl = static_cast(impl_holder.release()); _result.error = 0; diff --git a/tests/test_xpath_functions.cpp b/tests/test_xpath_functions.cpp index 30ba218..f5d9e53 100644 --- a/tests/test_xpath_functions.cpp +++ b/tests/test_xpath_functions.cpp @@ -557,6 +557,16 @@ TEST(xpath_string_translate) CHECK_XPATH_FAIL(STR("translate('a', 'b', 'c', 'd')")); } +TEST(xpath_string_translate_table) +{ + xml_node c; + + CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc', 'ABC')"), STR("ABCd\xe9 ")); + CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc\xe9', 'ABC!')"), STR("ABCd! ")); + CHECK_XPATH_STRING(c, STR("translate('abcde', concat('abc', 'd'), 'ABCD')"), STR("ABCDe")); + CHECK_XPATH_STRING(c, STR("translate('abcde', 'abcd', concat('ABC', 'D'))"), STR("ABCDe")); +} + TEST_XML(xpath_nodeset_last, "") { xml_node n = doc.child(STR("node")); -- cgit v1.2.3