summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2014-10-05 07:57:58 +0000
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2014-10-05 07:57:58 +0000
commit5239fcbd0030ce7dcca1ee05dab0e8be3a98a288 (patch)
tree704c161c7954fe45cd0043ba21fed96fe36b9a49
parentb17501c3fb01530aae790228850ac6122227d8a8 (diff)
XPath: Implement optimized translate()
translate() with constant arguments now uses a 128-byte table and a table lookup instead of searching characters in the source string. The table is generated during query optimization. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1052 99668b35-9821-0410-8761-19e4c4f06640
-rw-r--r--src/pugixml.cpp100
-rw-r--r--tests/test_xpath_functions.cpp10
2 files changed, 101 insertions, 9 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 8417c44..df58d8c 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -432,6 +432,7 @@ PUGI__NS_BEGIN
// get header
xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
+ assert(header);
// deallocate
size_t page_offset = sizeof(xml_memory_page) + header->page_offset;
@@ -7345,10 +7346,8 @@ PUGI__NS_BEGIN
*write = 0;
}
- PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
+ PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
{
- size_t to_length = strlength(to);
-
char_t* write = buffer;
while (*buffer)
@@ -7367,6 +7366,65 @@ PUGI__NS_BEGIN
*write = 0;
}
+ PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
+ {
+ unsigned char table[128] = {};
+
+ while (*from)
+ {
+ unsigned int fc = static_cast<unsigned int>(*from);
+ unsigned int tc = static_cast<unsigned int>(*to);
+
+ if (fc >= 128 || tc >= 128)
+ return 0;
+
+ if (!table[fc])
+ table[fc] = tc ? static_cast<unsigned char>(tc) : 128;
+
+ from++;
+ if (tc) to++;
+ }
+
+ for (int i = 0; i < 128; ++i)
+ if (!table[i])
+ table[i] = static_cast<unsigned char>(i);
+
+ void* result = alloc->allocate_nothrow(sizeof(table));
+
+ if (result)
+ {
+ memcpy(result, table, sizeof(table));
+ }
+
+ return static_cast<unsigned char*>(result);
+ }
+
+ PUGI__FN void translate_table(char_t* buffer, const unsigned char* table)
+ {
+ char_t* write = buffer;
+
+ while (*buffer)
+ {
+ char_t ch = *buffer++;
+ unsigned int index = static_cast<unsigned int>(ch);
+
+ if (index < 128)
+ {
+ unsigned char code = table[index];
+
+ *write = static_cast<char_t>(code);
+ write += 1 - (code >> 7);
+ }
+ else
+ {
+ *write++ = ch;
+ }
+ }
+
+ // zero-terminate
+ *write = 0;
+ }
+
struct xpath_variable_boolean: xpath_variable
{
xpath_variable_boolean(): value(false)
@@ -8111,6 +8169,7 @@ PUGI__NS_BEGIN
ast_func_normalize_space_0, // normalize-space()
ast_func_normalize_space_1, // normalize-space(left)
ast_func_translate, // translate(left, right, third)
+ ast_func_translate_table, // translate(left, right, third) where right/third are constants
ast_func_boolean, // boolean(left)
ast_func_not, // not(left)
ast_func_true, // true()
@@ -8189,6 +8248,8 @@ PUGI__NS_BEGIN
xpath_variable* variable;
// node test for ast_step (node name/namespace/node type/pi target)
const char_t* nodetest;
+ // table for ast_func_translate_table
+ const unsigned char* table;
} _data;
xpath_ast_node(const xpath_ast_node&);
@@ -9298,7 +9359,16 @@ PUGI__NS_BEGIN
xpath_string from = _right->eval_string(c, swapped_stack);
xpath_string to = _right->_next->eval_string(c, swapped_stack);
- translate(s.data(stack.result), from.c_str(), to.c_str());
+ translate(s.data(stack.result), from.c_str(), to.c_str(), to.length());
+
+ return s;
+ }
+
+ case ast_func_translate_table:
+ {
+ xpath_string s = _left->eval_string(c, stack);
+
+ translate_table(s.data(stack.result), _data.table);
return s;
}
@@ -9468,11 +9538,11 @@ PUGI__NS_BEGIN
}
}
- void optimize()
+ void optimize(xpath_allocator* alloc)
{
- if (_left) _left->optimize();
- if (_right) _right->optimize();
- if (_next) _next->optimize();
+ if (_left) _left->optimize(alloc);
+ if (_right) _right->optimize(alloc);
+ if (_next) _next->optimize(alloc);
// Replace descendant-or-self::node()/child::foo with descendant::foo
// The former is a full form of //foo, the latter is much faster since it executes the node test immediately
@@ -9484,6 +9554,18 @@ PUGI__NS_BEGIN
_axis = axis_descendant;
_left = _left->_left;
}
+
+ // Replace translate() with constant arguments with a table
+ if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
+ {
+ unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
+
+ if (table)
+ {
+ _type = ast_func_translate_table;
+ _data.table = table;
+ }
+ }
}
bool is_posinv_expr() const
@@ -10838,7 +10920,7 @@ namespace pugi
if (qimpl->root)
{
- qimpl->root->optimize();
+ qimpl->root->optimize(&qimpl->alloc);
_impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
_result.error = 0;
diff --git a/tests/test_xpath_functions.cpp b/tests/test_xpath_functions.cpp
index 30ba218..f5d9e53 100644
--- a/tests/test_xpath_functions.cpp
+++ b/tests/test_xpath_functions.cpp
@@ -557,6 +557,16 @@ TEST(xpath_string_translate)
CHECK_XPATH_FAIL(STR("translate('a', 'b', 'c', 'd')"));
}
+TEST(xpath_string_translate_table)
+{
+ xml_node c;
+
+ CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc', 'ABC')"), STR("ABCd\xe9 "));
+ CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc\xe9', 'ABC!')"), STR("ABCd! "));
+ CHECK_XPATH_STRING(c, STR("translate('abcde', concat('abc', 'd'), 'ABCD')"), STR("ABCDe"));
+ CHECK_XPATH_STRING(c, STR("translate('abcde', 'abcd', concat('ABC', 'D'))"), STR("ABCDe"));
+}
+
TEST_XML(xpath_nodeset_last, "<node><c1/><c1/><c2/><c3/><c3/><c3/><c3/></node>")
{
xml_node n = doc.child(STR("node"));