From bbd75fda4618dcbef272c8e5432153992c392588 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 23 Oct 2014 05:46:44 +0000 Subject: tests: Improve test coverage git-svn-id: https://pugixml.googlecode.com/svn/trunk@1074 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_document.cpp | 34 ++++++++++++++++++++++++++++++++++ tests/test_dom_modify.cpp | 35 +++++++++++++++++++++++++++++++++++ tests/test_parse.cpp | 31 ++++++++++++++++++++++++++++++- tests/test_xpath_paths.cpp | 3 +++ 4 files changed, 102 insertions(+), 1 deletion(-) diff --git a/tests/test_document.cpp b/tests/test_document.cpp index c75ed8f..c76f671 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -1235,3 +1235,37 @@ TEST(document_alignment) doc->~xml_document(); } } + +TEST(document_convert_out_of_memory) +{ + file_data_t files[] = + { + {"tests/data/utftest_utf16_be_clean.xml", encoding_utf16_be, 0, 0}, + {"tests/data/utftest_utf16_le_clean.xml", encoding_utf16_le, 0, 0}, + {"tests/data/utftest_utf32_be_clean.xml", encoding_utf32_be, 0, 0}, + {"tests/data/utftest_utf32_le_clean.xml", encoding_utf32_le, 0, 0}, + {"tests/data/utftest_utf8_clean.xml", encoding_utf8, 0, 0}, + {"tests/data/latintest_latin1.xml", encoding_latin1, 0, 0} + }; + + // load files in memory + for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i) + { + CHECK(load_file_in_memory(files[i].path, files[i].data, files[i].size)); + } + + // disallow allocations + test_runner::_memory_fail_threshold = 1; + + for (unsigned int src = 0; src < sizeof(files) / sizeof(files[0]); ++src) + { + xml_document doc; + CHECK(doc.load_buffer(files[src].data, files[src].size, parse_default, files[src].encoding).status == status_out_of_memory); + } + + // cleanup + for (unsigned int j = 0; j < sizeof(files) / sizeof(files[0]); ++j) + { + delete[] files[j].data; + } +} diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index 612b017..5f4e26c 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -1354,3 +1354,38 @@ TEST_XML(dom_node_copy_out_of_memory, "text") +{ + xml_node node = doc.child(STR("node")); + + xml_attribute attr = node.attribute(STR("attr")); + attr.set_name(STR("longattr")); + attr.set_value(STR("longvalue")); + + node.set_name(STR("longnode")); + node.text().set(STR("longtext")); + + node.remove_attribute(attr); + doc.remove_child(node); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_set_deallocate, "text") +{ + xml_node node = doc.child(STR("node")); + + xml_attribute attr = node.attribute(STR("attr")); + + attr.set_name(STR("longattr")); + attr.set_value(STR("longvalue")); + node.set_name(STR("longnode")); + + attr.set_name(STR("")); + attr.set_value(STR("")); + node.set_name(STR("")); + node.text().set(STR("")); + + CHECK_NODE(doc, STR("<:anonymous :anonymous=\"\">")); +} diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index 2094ef9..444a0fb 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -876,7 +876,7 @@ TEST(parse_out_of_memory) CHECK(!doc.first_child()); } -TEST(parse_out_of_memory_halfway) +TEST(parse_out_of_memory_halfway_node) { const unsigned int count = 10000; static char_t text[count * 4]; @@ -896,6 +896,35 @@ TEST(parse_out_of_memory_halfway) CHECK_NODE(doc.first_child(), STR("")); } +TEST(parse_out_of_memory_halfway_attr) +{ + const unsigned int count = 10000; + static char_t text[count * 5 + 4]; + + text[0] = '<'; + text[1] = 'n'; + + for (unsigned int i = 0; i < count; ++i) + { + text[5*i + 2] = ' '; + text[5*i + 3] = 'a'; + text[5*i + 4] = '='; + text[5*i + 5] = '"'; + text[5*i + 6] = '"'; + } + + text[5 * count + 2] = '/'; + text[5 * count + 3] = '>'; + + test_runner::_memory_fail_threshold = 65536; + + xml_document doc; + CHECK(doc.load_buffer_inplace(text, count * 5 + 4).status == status_out_of_memory); + CHECK_STRING(doc.first_child().name(), STR("n")); + CHECK_STRING(doc.first_child().first_attribute().name(), STR("a")); + CHECK_STRING(doc.first_child().last_attribute().name(), STR("a")); +} + static bool test_offset(const char_t* contents, unsigned int options, pugi::xml_parse_status status, ptrdiff_t offset) { xml_document doc; diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index da8811d..ee2401a 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -615,6 +615,9 @@ TEST_XML(xpath_paths_optimize_step_once, "

") -- cgit v1.2.3 From 903db8682a5f14b52adec996584c70ea072619ea Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 23 Oct 2014 07:41:07 +0000 Subject: tests: Add more tests for better coverage More tests for out-of-memory and other edge conditions git-svn-id: https://pugixml.googlecode.com/svn/trunk@1075 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_document.cpp | 39 +++++++++++++++++++++++++++++++++++++++ tests/test_dom_modify.cpp | 17 ++++++++++++++++- tests/test_parse.cpp | 22 ++++++++++++++++++++++ tests/test_parse_doctype.cpp | 9 +++++++++ tests/test_xpath.cpp | 23 +++++++++++++++++++++++ tests/test_xpath_api.cpp | 5 +++++ tests/test_xpath_functions.cpp | 15 ++++++++++++++- tests/test_xpath_variables.cpp | 3 +++ 8 files changed, 131 insertions(+), 2 deletions(-) diff --git a/tests/test_document.cpp b/tests/test_document.cpp index c76f671..2cc39a6 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -227,6 +227,34 @@ TEST(document_load_stream_nonseekable_large) CHECK(doc.load(in)); CHECK_NODE(doc, str.c_str()); } + +TEST(document_load_stream_nonseekable_out_of_memory) +{ + char contents[] = ""; + char_array_buffer buffer(contents, contents + sizeof(contents) / sizeof(contents[0])); + std::istream in(&buffer); + + test_runner::_memory_fail_threshold = 1; + + pugi::xml_document doc; + CHECK(doc.load(in).status == status_out_of_memory); +} + +TEST(document_load_stream_nonseekable_out_of_memory_large) +{ + std::basic_string str; + str += STR(""); + for (int i = 0; i < 10000; ++i) str += STR(""); + str += STR(""); + + char_array_buffer buffer(&str[0], &str[0] + str.length()); + std::basic_istream in(&buffer); + + test_runner::_memory_fail_threshold = 10000 * 8 * 3 / 2; + + pugi::xml_document doc; + CHECK(doc.load(in).status == status_out_of_memory); +} #endif TEST(document_load_string) @@ -295,6 +323,17 @@ TEST(document_load_file_wide_ascii) CHECK_NODE(doc, STR("")); } +TEST(document_load_file_wide_out_of_memory) +{ + test_runner::_memory_fail_threshold = 1; + + pugi::xml_document doc; + + pugi::xml_parse_result result = doc.load_file(L"tests/data/small.xml"); + + CHECK(result.status == status_out_of_memory || result.status == status_file_not_found); +} + TEST_XML(document_save, "") { xml_writer_string writer; diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index 5f4e26c..01b562a 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -27,6 +27,16 @@ TEST_XML(dom_attr_assign, "") CHECK_NODE(node, STR("")); } +TEST_XML(dom_attr_set_name, "") +{ + xml_attribute attr = doc.child(STR("node")).attribute(STR("attr")); + + CHECK(attr.set_name(STR("n"))); + CHECK(!xml_attribute().set_name(STR("n"))); + + CHECK_NODE(doc, STR("")); +} + TEST_XML(dom_attr_set_value, "") { xml_node node = doc.child(STR("node")); @@ -758,8 +768,9 @@ TEST(dom_node_declaration_name) doc.insert_child_after(node_declaration, doc.first_child()); doc.insert_child_before(node_declaration, doc.first_child()); + doc.prepend_child(node_declaration); - CHECK_NODE(doc, STR("")); + CHECK_NODE(doc, STR("")); } TEST(dom_node_declaration_attributes) @@ -867,17 +878,21 @@ TEST(dom_node_out_of_memory) // verify all node modification operations CHECK(!n.append_child()); + CHECK(!n.prepend_child()); CHECK(!n.insert_child_after(node_element, n.first_child())); CHECK(!n.insert_child_before(node_element, n.first_child())); CHECK(!n.append_attribute(STR(""))); + CHECK(!n.prepend_attribute(STR(""))); CHECK(!n.insert_attribute_after(STR(""), a)); CHECK(!n.insert_attribute_before(STR(""), a)); // verify node copy operations CHECK(!n.append_copy(n.first_child())); + CHECK(!n.prepend_copy(n.first_child())); CHECK(!n.insert_copy_after(n.first_child(), n.first_child())); CHECK(!n.insert_copy_before(n.first_child(), n.first_child())); CHECK(!n.append_copy(a)); + CHECK(!n.prepend_copy(a)); CHECK(!n.insert_copy_after(a, a)); CHECK(!n.insert_copy_before(a, a)); } diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index 444a0fb..c45b783 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -1068,3 +1068,25 @@ TEST(parse_pcdata_gap_fragment) CHECK(doc.load(STR("a&b"), parse_fragment | parse_escapes)); CHECK_STRING(doc.text().get(), STR("a&b")); } + +TEST(parse_name_end_eof) +{ + char_t test[] = STR(""); + + xml_document doc; + CHECK(doc.load_buffer_inplace(test, 6 * sizeof(char_t)).status == status_end_element_mismatch); + CHECK_STRING(doc.first_child().name(), STR("node")); +} + +TEST(parse_close_tag_eof) +{ + char_t test1[] = STR("")).status == status_bad_doctype); CHECK(doc.load(STR(""), parse_doctype).status == status_bad_doctype); } + +TEST(parse_doctype_error_ignore) +{ + xml_document doc; + CHECK(doc.load(STR(" query; + + for (int i = 0; i < 1024; ++i) query += STR("abcdefgh"); + + test_runner::_memory_fail_threshold = 8*1024; + +#ifdef PUGIXML_NO_EXCEPTIONS + CHECK(!xpath_query(query.c_str())); +#else + try + { + xpath_query q(query.c_str()); + + CHECK_FORCE_FAIL("Expected out of memory exception"); + } + catch (const std::bad_alloc&) + { + } +#endif +} #endif diff --git a/tests/test_xpath_api.cpp b/tests/test_xpath_api.cpp index 270f6aa..deb3beb 100644 --- a/tests/test_xpath_api.cpp +++ b/tests/test_xpath_api.cpp @@ -128,6 +128,11 @@ TEST_XML(xpath_api_nodeset_copy, "") copy4 = copy1; CHECK(copy4.size() == 2); CHECK_STRING(copy4[0].node().name(), STR("foo")); + + xpath_node_set copy5; + copy5 = set; + copy5 = xpath_node_set(); + CHECK(copy5.size() == 0); } TEST(xpath_api_nodeset_copy_empty) diff --git a/tests/test_xpath_functions.cpp b/tests/test_xpath_functions.cpp index da820ef..678bc2e 100644 --- a/tests/test_xpath_functions.cpp +++ b/tests/test_xpath_functions.cpp @@ -216,7 +216,7 @@ TEST(xpath_boolean_false) CHECK_XPATH_FAIL(STR("false(1)")); } -TEST_XML(xpath_boolean_lang, "") +TEST_XML(xpath_boolean_lang, "") { xml_node c; @@ -244,6 +244,9 @@ TEST_XML(xpath_boolean_lang, "pcdatafoobar") { CHECK_XPATH_STRING(doc, STR("concat('a', 'b', 'c', translate(node, 'o', 'a'), 'd')"), STR("abcfaabard")); diff --git a/tests/test_xpath_variables.cpp b/tests/test_xpath_variables.cpp index 70bb4ea..39a4f05 100644 --- a/tests/test_xpath_variables.cpp +++ b/tests/test_xpath_variables.cpp @@ -88,6 +88,9 @@ TEST(xpath_variables_type_string) CHECK_DOUBLE_NAN(var->get_number()); CHECK_STRING(var->get_string(), STR("abc")); CHECK(var->get_node_set().empty()); + + CHECK(var->set(STR("abcdef"))); + CHECK_STRING(var->get_string(), STR("abcdef")); } TEST_XML(xpath_variables_type_node_set, "") -- cgit v1.2.3 From 546997683a9edc1b77b60ac96776668d3f57adad Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 24 Oct 2014 01:17:57 +0000 Subject: tests: Add even more coverage tests Also fix MSVC6 compilation (make convertions to function pointers explicit). git-svn-id: https://pugixml.googlecode.com/svn/trunk@1076 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_dom_traverse.cpp | 10 +++++----- tests/test_xpath.cpp | 43 +++++++++++++++++++++++++++++++++++++++++- tests/test_xpath_variables.cpp | 23 ++++++++++++++++++++++ 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/tests/test_dom_traverse.cpp b/tests/test_dom_traverse.cpp index e42846f..83afec8 100644 --- a/tests/test_dom_traverse.cpp +++ b/tests/test_dom_traverse.cpp @@ -1048,14 +1048,14 @@ TEST_XML(dom_unspecified_bool_coverage, "text") { xml_node node = doc.first_child(); - node(0); - node.first_attribute()(0); - node.text()(0); + static_cast(node)(0); + static_cast(node.first_attribute())(0); + static_cast(node.text())(0); #ifndef PUGIXML_NO_XPATH xpath_query q(STR("/node")); - q(0); - q.evaluate_node(doc)(0); + static_cast(q)(0); + static_cast(q.evaluate_node(doc))(0); #endif } diff --git a/tests/test_xpath.cpp b/tests/test_xpath.cpp index 80115b8..a65ee37 100644 --- a/tests/test_xpath.cpp +++ b/tests/test_xpath.cpp @@ -110,7 +110,7 @@ TEST_XML(xpath_sort_attributes, "") n.append_attribute(STR("attr3")); n.insert_attribute_before(STR("attr1"), n.attribute(STR("attr2"))); - xpath_node_set ns = n.select_nodes(STR("@*")); + xpath_node_set ns = n.select_nodes(STR("@* | @*")); ns.sort(true); xpath_node_set reverse_sorted = ns; @@ -122,6 +122,25 @@ TEST_XML(xpath_sort_attributes, "") xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 5 % 4 % 3; } +TEST_XML(xpath_sort_attributes_docorder, "") +{ + xml_node n = doc.child(STR("node")); + + n.first_attribute().set_name(STR("attribute1")); + n.insert_attribute_after(STR("attr3"), n.attribute(STR("attr2"))); + + xpath_node_set ns = n.select_nodes(STR("@* | @*")); + + ns.sort(true); + xpath_node_set reverse_sorted = ns; + + ns.sort(false); + xpath_node_set sorted = ns; + + xpath_node_set_tester(sorted, "sorted order failed") % 3 % 4 % 5 % 6; + xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 6 % 5 % 4 % 3; +} + TEST(xpath_sort_random_medium) { xml_document doc; @@ -629,4 +648,26 @@ TEST(xpath_allocate_string_out_of_memory) } #endif } + +TEST(xpath_remove_duplicates) +{ + xml_document doc; + + for (int i = 0; i < 20; ++i) + { + doc.append_child(STR("node2")); + doc.prepend_child(STR("node1")); + } + + xpath_node_set ns = doc.select_nodes(STR("/node2/preceding::* | //node1 | /node() | /* | /node1/following-sibling::*")); + + ns.sort(); + + { + xpath_node_set_tester tester(ns, "sorted order failed"); + + for (int i = 0; i < 40; ++i) + tester % (2 + i); + } +} #endif diff --git a/tests/test_xpath_variables.cpp b/tests/test_xpath_variables.cpp index 39a4f05..728eaa9 100644 --- a/tests/test_xpath_variables.cpp +++ b/tests/test_xpath_variables.cpp @@ -276,6 +276,29 @@ TEST(xpath_variables_long_name) CHECK_XPATH_BOOLEAN_VAR(xml_node(), STR("$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), &set, true); } +TEST(xpath_variables_long_name_out_of_memory) +{ + xpath_variable_set set; + set.set(STR("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), true); + + test_runner::_memory_fail_threshold = 4096 + 128; + +#ifdef PUGIXML_NO_EXCEPTIONS + xpath_query q(STR("$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), &set); + CHECK(!q); +#else + try + { + xpath_query q(STR("$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), &set); + + CHECK_FORCE_FAIL("Expected exception"); + } + catch (const xpath_exception&) + { + } +#endif +} + TEST_XML(xpath_variables_select, "") { xpath_variable_set set; -- cgit v1.2.3 From 7b74531c1badc08b15983a34a85f837c2bd0a8e7 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 24 Oct 2014 01:37:27 +0000 Subject: tests: Fix test failure in PUGIXML_WCHAR_MODE git-svn-id: https://pugixml.googlecode.com/svn/trunk@1077 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_xpath_variables.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_xpath_variables.cpp b/tests/test_xpath_variables.cpp index 728eaa9..53b40cf 100644 --- a/tests/test_xpath_variables.cpp +++ b/tests/test_xpath_variables.cpp @@ -281,7 +281,7 @@ TEST(xpath_variables_long_name_out_of_memory) xpath_variable_set set; set.set(STR("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), true); - test_runner::_memory_fail_threshold = 4096 + 128; + test_runner::_memory_fail_threshold = 4096 + 64 + 52 * sizeof(char_t); #ifdef PUGIXML_NO_EXCEPTIONS xpath_query q(STR("$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), &set); -- cgit v1.2.3 From 4363e8a651c2ca24d7fc41e5707bc44ed102e94a Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 25 Oct 2014 03:17:55 +0000 Subject: Remove redundant null pointer checks. When removing a node or attribute, we know that the parent has at least one node/attribute so a null pointer check is redundant. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1078 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index abe7adf..1187383 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -700,7 +700,7 @@ PUGI__NS_BEGIN if (node->next_sibling) node->next_sibling->prev_sibling_c = node->prev_sibling_c; - else if (parent->first_child) + else parent->first_child->prev_sibling_c = node->prev_sibling_c; if (node->prev_sibling_c->next_sibling) @@ -775,7 +775,7 @@ PUGI__NS_BEGIN { if (attr->next_attribute) attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; - else if (node->first_attribute) + else node->first_attribute->prev_attribute_c = attr->prev_attribute_c; if (attr->prev_attribute_c->next_attribute) -- cgit v1.2.3 From 503abf607a9e1d5d778edb48e00514f5cb73f777 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 25 Oct 2014 05:28:37 +0000 Subject: Add 'coverage' configuration to Makefile. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1079 99668b35-9821-0410-8761-19e4c4f06640 --- Makefile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Makefile b/Makefile index eddd4f2..d351e0f 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,12 @@ ifeq ($(config),release) CXXFLAGS+=-O3 -DNDEBUG endif +ifeq ($(config),coverage) + CXXFLAGS+=-DNDEBUG + CXXFLAGS+=-fprofile-arcs -ftest-coverage + LDFLAGS+=-fprofile-arcs +endif + ifneq ($(defines),standard) COMMA=, CXXFLAGS+=-D $(subst $(COMMA), -D ,$(defines)) @@ -22,8 +28,16 @@ OBJECTS=$(SOURCES:%=$(BUILD)/%.o) all: $(EXECUTABLE) +ifeq ($(config),coverage) test: $(EXECUTABLE) + @find $(BUILD) -name '*.gcda' | xargs rm ./$(EXECUTABLE) + @gcov -b -c $(BUILD)/src/pugixml.cpp.gcda | sed -e '/./{H;$!d;}' -e 'x;/pugixml.cpp/!d;' + @ls *.gcov | grep -v pugixml.cpp.gcov | xargs rm +else +test: $(EXECUTABLE) + ./$(EXECUTABLE) +endif clean: rm -rf $(BUILD) -- cgit v1.2.3 From 0b7964917cbfd9d007c8a017eaced636f53ddeb3 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 25 Oct 2014 16:37:16 +0000 Subject: Fix node copying for some out of memory cases A page can fail to allocate during attribute creation; this case was not previously handled. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1080 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 9 +++++++-- tests/test_dom_modify.cpp | 17 +++++++++++++---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 1187383..109a635 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -164,6 +164,8 @@ PUGI__NS_BEGIN static deallocation_function deallocate; }; + // Global allocation functions are stored in class statics so that in header mode linker deduplicates them + // Without a template<> we'll get multiple definitions of the same static template allocation_function xml_memory_management_function_storage::allocate = default_allocate; template deallocation_function xml_memory_management_function_storage::deallocate = default_deallocate; @@ -3735,8 +3737,11 @@ PUGI__NS_BEGIN { xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); - node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); - node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); + if (da) + { + node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); + node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); + } } } diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index 01b562a..07fe6dc 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -1361,13 +1361,22 @@ TEST_XML(dom_node_copyless_taint, "") CHECK_NODE(doc, STR("")); } -TEST_XML(dom_node_copy_out_of_memory, "text1text2") +TEST_XML(dom_node_copy_out_of_memory_node, "text1text2") { test_runner::_memory_fail_threshold = 32768 * 2 + 4096; - xml_document copy; - for (int i = 0; i < 100; ++i) - copy.append_copy(doc.first_child()); + xml_document copy; + for (int i = 0; i < 1000; ++i) + copy.append_copy(doc.first_child()); +} + +TEST_XML(dom_node_copy_out_of_memory_attr, "") +{ + test_runner::_memory_fail_threshold = 32768 * 2 + 4096; + + xml_document copy; + for (int i = 0; i < 1000; ++i) + copy.append_copy(doc.first_child()); } TEST_XML(dom_node_remove_deallocate, "text") -- cgit v1.2.3 From 0e59ea8234e8365a67cd7c35d9cf6eaaec41996b Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 26 Oct 2014 03:46:28 +0000 Subject: tests: Add a way for tests to verify allocation failure git-svn-id: https://pugixml.googlecode.com/svn/trunk@1081 99668b35-9821-0410-8761-19e4c4f06640 --- tests/main.cpp | 6 ++++++ tests/test.hpp | 1 + 2 files changed, 7 insertions(+) diff --git a/tests/main.cpp b/tests/main.cpp index 75b0108..c4c9341 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -20,6 +20,7 @@ test_runner* test_runner::_tests = 0; size_t test_runner::_memory_fail_threshold = 0; +bool test_runner::_memory_fail_triggered = false; jmp_buf test_runner::_failure_buffer; const char* test_runner::_failure_message; const char* test_runner::_temp_path; @@ -30,7 +31,11 @@ static size_t g_memory_total_count = 0; static void* custom_allocate(size_t size) { if (test_runner::_memory_fail_threshold > 0 && test_runner::_memory_fail_threshold < g_memory_total_size + size) + { + test_runner::_memory_fail_triggered = true; + return 0; + } else { void* ptr = memory_allocate(size); @@ -84,6 +89,7 @@ static bool run_test(test_runner* test) g_memory_total_size = 0; g_memory_total_count = 0; test_runner::_memory_fail_threshold = 0; + test_runner::_memory_fail_triggered = false; pugi::set_memory_management_functions(custom_allocate, custom_deallocate); diff --git a/tests/test.hpp b/tests/test.hpp index 26260ad..509b0cd 100644 --- a/tests/test.hpp +++ b/tests/test.hpp @@ -23,6 +23,7 @@ struct test_runner static test_runner* _tests; static size_t _memory_fail_threshold; + static bool _memory_fail_triggered; static jmp_buf _failure_buffer; static const char* _failure_message; -- cgit v1.2.3 From 3fc86dcdc62556cbb760cf4b9ddf606c3b5d36d3 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 26 Oct 2014 02:21:29 -0700 Subject: Update README.md with new documentation URLs. --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 01a53a0..2d8263d 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,8 @@ pugixml is used by a lot of projects, both open-source and proprietary, for perf Documentation for the current release of pugixml is available on-line as two separate documents: -* [Quick-start guide](http://pugixml.googlecode.com/svn/tags/latest/docs/quickstart.html), that aims to provide enough information to start using the library; -* [Complete reference manual](http://pugixml.googlecode.com/svn/tags/latest/docs/manual.html), that describes all features of the library in detail. +* [Quick-start guide](http://cdn.rawgit.com/zeux/pugixml/v1.4/docs/quickstart.html), that aims to provide enough information to start using the library; +* [Complete reference manual](http://cdn.rawgit.com/zeux/pugixml/v1.4/docs/manual.html), that describes all features of the library in detail. You’re advised to start with the quick-start guide; however, many important library features are either not described in it at all or only mentioned briefly; if you require more information you should read the complete manual. @@ -41,4 +41,4 @@ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +OTHER DEALINGS IN THE SOFTWARE. -- cgit v1.2.3 From f31c14e1fdcbd6e406e7e2bea21b303853e028e3 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 26 Oct 2014 02:22:27 -0700 Subject: tests: Remove git2svn helper script! --- tests/gitsvn.sh | 5 ----- 1 file changed, 5 deletions(-) delete mode 100755 tests/gitsvn.sh diff --git a/tests/gitsvn.sh b/tests/gitsvn.sh deleted file mode 100755 index 222a09f..0000000 --- a/tests/gitsvn.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -git svn init https://pugixml.googlecode.com/svn/trunk -git update-ref refs/remotes/git-svn refs/remotes/origin/master -git svn rebase -- cgit v1.2.3 From d5e29292c67c24b5cd7ac4f6afce2f8ec293144a Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 26 Oct 2014 09:37:18 -0700 Subject: XPath: Optimize constant filters/predicates If a filter/predicate expression is a constant, we don't need to evaluate it for every nodeset element - we can evaluate it once and pick the right element or keep/discard the entire collection. If the expression is 1, we can early out on first node when evaluating the node set - queries like following::item[1] are now significantly faster. Additionally this change refactors filters/predicates to have additional metadata describing the expression type in _test field that is filled during optimization. Note that predicate_constant selection right now is very simple (but captures most common use cases except for maybe [last()]). --- src/pugixml.cpp | 116 +++++++++++++++++++++++++++++++++++++-------- tests/test_xpath_paths.cpp | 38 +++++++++++++++ 2 files changed, 133 insertions(+), 21 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 109a635..ac36a31 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1,4 +1,5 @@ /** +{ * pugixml parser - version 1.4 * -------------------------------------------------------- * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) @@ -8199,7 +8200,6 @@ PUGI__NS_BEGIN ast_op_union, // left | right ast_predicate, // apply predicate to set; next points to next predicate ast_filter, // select * from left where right - ast_filter_posinv, // select * from left where right; proximity position invariant ast_string_constant, // string constant ast_number_constant, // number constant ast_variable, // variable @@ -8275,6 +8275,14 @@ PUGI__NS_BEGIN nodetest_all_in_namespace }; + enum predicate_t + { + predicate_default, + predicate_posinv, + predicate_constant, + predicate_constant_one + }; + enum nodeset_eval_t { nodeset_eval_all, @@ -8296,8 +8304,10 @@ PUGI__NS_BEGIN char _type; char _rettype; - // for ast_step / ast_predicate + // for ast_step char _axis; + + // for ast_step/ast_predicate/ast_filter char _test; // tree node structure @@ -8494,7 +8504,7 @@ PUGI__NS_BEGIN size_t size = ns.size() - first; xpath_node* last = ns.begin() + first; - + // remove_if... or well, sort of for (xpath_node* it = last; it != ns.end(); ++it, ++i) { @@ -8509,17 +8519,57 @@ PUGI__NS_BEGIN if (once) break; } } - else if (expr->eval_boolean(c, stack)) + else { - *last++ = *it; + if (expr->eval_boolean(c, stack)) + { + *last++ = *it; - if (once) break; + if (once) break; + } } } ns.truncate(last); } + static void apply_predicate_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) + { + assert(ns.size() >= first); + + size_t size = ns.size() - first; + + xpath_node* last = ns.begin() + first; + + xpath_context c(xpath_node(), 1, size); + + if (expr->rettype() == xpath_type_number) + { + double er = expr->eval_number(c, stack); + + if (er >= 1.0 && er <= size) + { + size_t eri = static_cast(er); + + if (er == eri) + { + xpath_node r = last[eri - 1]; + + *last++ = r; + } + } + } + else + { + if (expr->eval_boolean(c, stack)) + { + last += size; + } + } + + ns.truncate(last); + } + void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) { if (ns.size() == first) return; @@ -8528,7 +8578,12 @@ PUGI__NS_BEGIN for (xpath_ast_node* pred = _right; pred; pred = pred->_next) { - apply_predicate(ns, first, pred->_left, stack, !pred->_next && last_once); + assert(pred->_type == ast_predicate); + + if (pred->_test == predicate_constant || pred->_test == predicate_constant_one) + apply_predicate_const(ns, first, pred->_left, stack); + else + apply_predicate(ns, first, pred->_left, stack, !pred->_next && last_once); } } @@ -8938,9 +8993,9 @@ PUGI__NS_BEGIN bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); bool once = - (axis == axis_attribute && _test == nodetest_name) - ? true - : !_right && eval_once(!axis_reverse, eval); + (axis == axis_attribute && _test == nodetest_name) || + (!_right && eval_once(!axis_reverse, eval)) || + (_right && !_right->_next && _right->_test == predicate_constant_one); xpath_node_set_raw ns; ns.set_type(axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted); @@ -9007,6 +9062,7 @@ PUGI__NS_BEGIN xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): _type(static_cast(type)), _rettype(xpath_type_node_set), _axis(static_cast(axis)), _test(static_cast(test)), _left(left), _right(0), _next(0) { + assert(type == ast_step); _data.nodetest = contents; } @@ -9583,27 +9639,33 @@ PUGI__NS_BEGIN xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); - + // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother rs.set_type(xpath_node_set::type_unsorted); rs.append(ls.begin(), ls.end(), stack.result); rs.remove_duplicates(); - + return rs; } case ast_filter: - case ast_filter_posinv: { - xpath_node_set_raw set = _left->eval_node_set(c, stack, nodeset_eval_all); + xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); // either expression is a number or it contains position() call; sort by document order - if (_type == ast_filter) set.sort_do(); + if (_test != predicate_posinv) set.sort_do(); - bool once = eval_once(set.type() == xpath_node_set::type_sorted, eval); + if (_test == predicate_constant || _test == predicate_constant_one) + { + apply_predicate_const(set, 0, _right, stack); + } + else + { + bool once = eval_once(set.type() == xpath_node_set::type_sorted, eval); - apply_predicate(set, 0, _right, stack, once); + apply_predicate(set, 0, _right, stack, once); + } return set; } @@ -9706,6 +9768,21 @@ PUGI__NS_BEGIN if (_right) _right->optimize(alloc); if (_next) _next->optimize(alloc); + // Classify filter/predicate ops to perform various optimizations during evaluation + if (_type == ast_filter || _type == ast_predicate) + { + xpath_ast_node* expr = (_type == ast_filter) ? _right : _left; + + if (expr->_type == ast_number_constant && expr->_data.number == 1.0) + _test = predicate_constant_one; + else if (expr->_type == ast_number_constant || expr->_type == ast_variable) + _test = predicate_constant; + else if (expr->rettype() != xpath_type_number && expr->is_posinv_expr()) + _test = predicate_posinv; + else + _test = predicate_default; + } + // Replace descendant-or-self::node()/child::foo with descendant::foo // The former is a full form of //foo, the latter is much faster since it executes the node test immediately // Do a similar kind of replacement for self/descendant/descendant-or-self axes @@ -9762,7 +9839,6 @@ PUGI__NS_BEGIN case ast_predicate: case ast_filter: - case ast_filter_posinv: return true; default: @@ -10206,9 +10282,7 @@ PUGI__NS_BEGIN if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); - bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv_expr(); - - n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr); + n = new (alloc_node()) xpath_ast_node(ast_filter, xpath_type_node_set, n, expr); if (_lexer.current() != lex_close_square_brace) throw_error("Unmatched square brace"); diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index ee2401a..046592a 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -437,6 +437,44 @@ TEST_XML(xpath_paths_predicate_number, "") +{ + CHECK_XPATH_NODESET(doc, STR("node/chapter[0.999999999999999]")); + CHECK_XPATH_NODESET(doc, STR("node/chapter[1]")) % 3; + CHECK_XPATH_NODESET(doc, STR("node/chapter[1.000000000000001]")); + CHECK_XPATH_NODESET(doc, STR("node/chapter[1.999999999999999]")); + CHECK_XPATH_NODESET(doc, STR("node/chapter[2]")) % 4; + CHECK_XPATH_NODESET(doc, STR("node/chapter[2.000000000000001]")); + CHECK_XPATH_NODESET(doc, STR("node/chapter[4.999999999999999]")); + CHECK_XPATH_NODESET(doc, STR("node/chapter[5]")) % 7; + CHECK_XPATH_NODESET(doc, STR("node/chapter[5.000000000000001]")); +} + +TEST_XML(xpath_paths_predicate_number_out_of_range, "") +{ + xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); + + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[0]")); + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[-1]")); + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[-1000000000000]")); + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[-1 div 0]")); + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[1000000000000]")); + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[1 div 0]")); + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[0 div 0]")); +} + +TEST_XML(xpath_paths_predicate_constant_boolean, "") +{ + xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); + + xpath_variable_set set; + set.set(STR("true"), true); + set.set(STR("false"), false); + + CHECK_XPATH_NODESET_VAR(n, STR("following-sibling::chapter[$false]"), &set); + CHECK_XPATH_NODESET_VAR(n, STR("following-sibling::chapter[$true]"), &set) % 6 % 7; +} + TEST_XML(xpath_paths_predicate_several, "") { xml_node n = doc.child(STR("node")); -- cgit v1.2.3 From 11ce004e040937e17755b3bdf2a1d7f846214f3f Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 26 Oct 2014 10:07:01 -0700 Subject: XPath: Unify ast_filter/ast_predicate structure Now expression is always _right for filter/predicate nodes to make optimize() simpler. Additionally we now use predicate metadata to make is_posinv_step() faster. This introduces a weak ordering dependency in rewrite rules to optimize() - classification has to be performed before other optimizations. --- src/pugixml.cpp | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index ac36a31..d66f9f9 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -8581,9 +8581,9 @@ PUGI__NS_BEGIN assert(pred->_type == ast_predicate); if (pred->_test == predicate_constant || pred->_test == predicate_constant_one) - apply_predicate_const(ns, first, pred->_left, stack); + apply_predicate_const(ns, first, pred->_right, stack); else - apply_predicate(ns, first, pred->_left, stack, !pred->_next && last_once); + apply_predicate(ns, first, pred->_right, stack, !pred->_next && last_once); } } @@ -9066,6 +9066,12 @@ PUGI__NS_BEGIN _data.nodetest = contents; } + xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): + _type(static_cast(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast(test)), _left(left), _right(right), _next(0) + { + assert(type == ast_filter || type == ast_predicate); + } + void set_next(xpath_ast_node* value) { _next = value; @@ -9771,16 +9777,14 @@ PUGI__NS_BEGIN // Classify filter/predicate ops to perform various optimizations during evaluation if (_type == ast_filter || _type == ast_predicate) { - xpath_ast_node* expr = (_type == ast_filter) ? _right : _left; + assert(_test == predicate_default); - if (expr->_type == ast_number_constant && expr->_data.number == 1.0) + if (_right->_type == ast_number_constant && _right->_data.number == 1.0) _test = predicate_constant_one; - else if (expr->_type == ast_number_constant || expr->_type == ast_variable) + else if (_right->_type == ast_number_constant || _right->_type == ast_variable) _test = predicate_constant; - else if (expr->rettype() != xpath_type_number && expr->is_posinv_expr()) + else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) _test = predicate_posinv; - else - _test = predicate_default; } // Replace descendant-or-self::node()/child::foo with descendant::foo @@ -9814,7 +9818,7 @@ PUGI__NS_BEGIN // Use optimized path for @attr = 'value' or @attr = $value if (_type == ast_op_equal && _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && - (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->rettype() == xpath_type_string))) + (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) { _type = ast_opt_compare_attribute; } @@ -9859,10 +9863,7 @@ PUGI__NS_BEGIN { assert(n->_type == ast_predicate); - xpath_ast_node* expr = n->_left; - bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv_expr(); - - if (!posinv) + if (n->_test != predicate_posinv) return false; } @@ -10282,7 +10283,7 @@ PUGI__NS_BEGIN if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); - n = new (alloc_node()) xpath_ast_node(ast_filter, xpath_type_node_set, n, expr); + n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); if (_lexer.current() != lex_close_square_brace) throw_error("Unmatched square brace"); @@ -10426,7 +10427,7 @@ PUGI__NS_BEGIN xpath_ast_node* expr = parse_expression(); - xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr); + xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); if (_lexer.current() != lex_close_square_brace) throw_error("Unmatched square brace"); -- cgit v1.2.3 From dbfe85a717b7ac8b9bd30eb51796811c0c651da9 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 26 Oct 2014 10:24:57 -0700 Subject: XPath: Only recognize numeric constant expressions Numeric and boolean constant expressions in filters are different in that to evaluate numeric expressions we need a sorted order, but to evaluate boolean expressions we don't. The previously implemented optimization adds an extra sorting step for constant boolean filters that will be more expensive than redundant computations. Since constant booleans are sort of an edge case, don't do this optimization. This allows us to simplify apply_predicate_const to only handle numbers. --- src/pugixml.cpp | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index d66f9f9..67591a3 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -8536,6 +8536,7 @@ PUGI__NS_BEGIN static void apply_predicate_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) { assert(ns.size() >= first); + assert(expr->rettype() == xpath_type_number); size_t size = ns.size() - first; @@ -8543,27 +8544,17 @@ PUGI__NS_BEGIN xpath_context c(xpath_node(), 1, size); - if (expr->rettype() == xpath_type_number) + double er = expr->eval_number(c, stack); + + if (er >= 1.0 && er <= size) { - double er = expr->eval_number(c, stack); + size_t eri = static_cast(er); - if (er >= 1.0 && er <= size) + if (er == eri) { - size_t eri = static_cast(er); - - if (er == eri) - { - xpath_node r = last[eri - 1]; + xpath_node r = last[eri - 1]; - *last++ = r; - } - } - } - else - { - if (expr->eval_boolean(c, stack)) - { - last += size; + *last++ = r; } } @@ -9781,16 +9772,16 @@ PUGI__NS_BEGIN if (_right->_type == ast_number_constant && _right->_data.number == 1.0) _test = predicate_constant_one; - else if (_right->_type == ast_number_constant || _right->_type == ast_variable) + else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable)) _test = predicate_constant; else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) _test = predicate_posinv; } - // Replace descendant-or-self::node()/child::foo with descendant::foo + // Rewrite descendant-or-self::node()/child::foo with descendant::foo // The former is a full form of //foo, the latter is much faster since it executes the node test immediately - // Do a similar kind of replacement for self/descendant/descendant-or-self axes - // Note that we only replace positionally invariant steps (//foo[1] != /descendant::foo[1]) + // Do a similar kind of rewrite for self/descendant/descendant-or-self axes + // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && is_posinv_step()) @@ -9803,7 +9794,7 @@ PUGI__NS_BEGIN _left = _left->_left; } - // Replace translate() with constant arguments with a table + // Use optimized lookup table implementation for translate() with constant arguments if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) { unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); -- cgit v1.2.3 From c64d4820b142e6df93ccf612d0e4717159a36591 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 27 Oct 2014 18:50:09 -0700 Subject: XPath: Optimize [position()=expr] and [last()] To get more benefits from constant predicate/filter optimization we rewrite [position()=expr] predicates into [expr] for numeric expressions. Right now the rewrite is only for entire expressions - it may be beneficial to split complex expressions like [position()=constant and expr] into [constant][expr] but that is more complicated. last() does not depend on the node set contents so is "constant" as far as our optimization is concerned so we can evaluate it once. --- src/pugixml.cpp | 10 +++++++++- tests/test_xpath_paths.cpp | 8 ++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 67591a3..f0bfc31 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -9765,6 +9765,14 @@ PUGI__NS_BEGIN if (_right) _right->optimize(alloc); if (_next) _next->optimize(alloc); + // Rewrite [position()=expr] with [expr] + // Note that this step has to go before classification to recognize [position()=1] + if ((_type == ast_filter || _type == ast_predicate) && + _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) + { + _right = _right->_right; + } + // Classify filter/predicate ops to perform various optimizations during evaluation if (_type == ast_filter || _type == ast_predicate) { @@ -9772,7 +9780,7 @@ PUGI__NS_BEGIN if (_right->_type == ast_number_constant && _right->_data.number == 1.0) _test = predicate_constant_one; - else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable)) + else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) _test = predicate_constant; else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) _test = predicate_posinv; diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index 046592a..df0dfa4 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -475,6 +475,14 @@ TEST_XML(xpath_paths_predicate_constant_boolean, "3") +{ + CHECK_XPATH_NODESET(doc, STR("node/chapter[position()=1]")) % 3; + CHECK_XPATH_NODESET(doc, STR("node/chapter[position()=2+2]")) % 7; + CHECK_XPATH_NODESET(doc, STR("node/chapter[position()=last()]")) % 8; + CHECK_XPATH_NODESET(doc, STR("node/chapter[position()=string()]")) % 5; +} + TEST_XML(xpath_paths_predicate_several, "") { xml_node n = doc.child(STR("node")); -- cgit v1.2.3 From 6229138d80380d582f16931d36b279807dcb82dd Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 27 Oct 2014 22:29:14 -0700 Subject: Optimize node printing by using raw pointers This lets us do fewer null pointer checks (making printing 2% faster with -O3) and removes a lot of function calls (making printing 20% faster with -O0). --- src/pugixml.cpp | 81 +++++++++++++++++++++++++++------------------------- tests/test_write.cpp | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 39 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index f0bfc31..13ed4b8 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3452,36 +3452,37 @@ PUGI__NS_BEGIN writer.write('-', '-', '>'); } - PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node node, unsigned int flags) + PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) { const char_t* default_name = PUGIXML_TEXT(":anonymous"); - for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) + for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) { writer.write(' '); - writer.write_string(a.name()[0] ? a.name() : default_name); + writer.write_string(a->name ? a->name : default_name); writer.write('=', '"'); - text_output(writer, a.value(), ctx_special_attr, flags); + if (a->value) + text_output(writer, a->value, ctx_special_attr, flags); writer.write('"'); } } - PUGI__FN bool node_output_start(xml_buffered_writer& writer, const xml_node node, unsigned int flags) + PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) { const char_t* default_name = PUGIXML_TEXT(":anonymous"); - const char_t* name = node.name()[0] ? node.name() : default_name; + const char_t* name = node->name ? node->name : default_name; writer.write('<'); writer.write_string(name); - if (node.first_attribute()) + if (node->first_attribute) node_output_attributes(writer, node, flags); if (flags & format_raw) { - if (!node.first_child()) + if (!node->first_child) writer.write(' ', '/', '>'); else { @@ -3492,18 +3493,20 @@ PUGI__NS_BEGIN } else { - xml_node first = node.first_child(); + xml_node_struct* first = node->first_child; if (!first) writer.write(' ', '/', '>', '\n'); - else if (!first.next_sibling() && (first.type() == node_pcdata || first.type() == node_cdata)) + else if (!first->next_sibling && (PUGI__NODETYPE(first) == node_pcdata || PUGI__NODETYPE(first) == node_cdata)) { writer.write('>'); - if (first.type() == node_pcdata) - text_output(writer, first.value(), ctx_special_pcdata, flags); + const char_t* value = first->value ? first->value : PUGIXML_TEXT(""); + + if (PUGI__NODETYPE(first) == node_pcdata) + text_output(writer, value, ctx_special_pcdata, flags); else - text_output_cdata(writer, first.value()); + text_output_cdata(writer, value); writer.write('<', '/'); writer.write_string(name); @@ -3520,10 +3523,10 @@ PUGI__NS_BEGIN return false; } - PUGI__FN void node_output_end(xml_buffered_writer& writer, const xml_node node, unsigned int flags) + PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) { const char_t* default_name = PUGIXML_TEXT(":anonymous"); - const char_t* name = node.name()[0] ? node.name() : default_name; + const char_t* name = node->name ? node->name : default_name; writer.write('<', '/'); writer.write_string(name); @@ -3534,35 +3537,35 @@ PUGI__NS_BEGIN writer.write('>', '\n'); } - PUGI__FN void node_output_simple(xml_buffered_writer& writer, const xml_node node, unsigned int flags) + PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) { const char_t* default_name = PUGIXML_TEXT(":anonymous"); - switch (node.type()) + switch (PUGI__NODETYPE(node)) { case node_pcdata: - text_output(writer, node.value(), ctx_special_pcdata, flags); + text_output(writer, node->value ? node->value : PUGIXML_TEXT(""), ctx_special_pcdata, flags); if ((flags & format_raw) == 0) writer.write('\n'); break; case node_cdata: - text_output_cdata(writer, node.value()); + text_output_cdata(writer, node->value ? node->value : PUGIXML_TEXT("")); if ((flags & format_raw) == 0) writer.write('\n'); break; case node_comment: - node_output_comment(writer, node.value()); + node_output_comment(writer, node->value ? node->value : PUGIXML_TEXT("")); if ((flags & format_raw) == 0) writer.write('\n'); break; case node_pi: writer.write('<', '?'); - writer.write_string(node.name()[0] ? node.name() : default_name); + writer.write_string(node->name ? node->name : default_name); - if (node.value()[0]) + if (node->value) { writer.write(' '); - writer.write_string(node.value()); + writer.write_string(node->value); } writer.write('?', '>'); @@ -3571,7 +3574,7 @@ PUGI__NS_BEGIN case node_declaration: writer.write('<', '?'); - writer.write_string(node.name()[0] ? node.name() : default_name); + writer.write_string(node->name ? node->name : default_name); node_output_attributes(writer, node, flags); writer.write('?', '>'); if ((flags & format_raw) == 0) writer.write('\n'); @@ -3581,10 +3584,10 @@ PUGI__NS_BEGIN writer.write('<', '!', 'D', 'O', 'C'); writer.write('T', 'Y', 'P', 'E'); - if (node.value()[0]) + if (node->value) { writer.write(' '); - writer.write_string(node.value()); + writer.write_string(node->value); } writer.write('>'); @@ -3596,11 +3599,11 @@ PUGI__NS_BEGIN } } - PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node root, const char_t* indent, unsigned int flags, unsigned int depth) + PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) { size_t indent_length = ((flags & (format_indent | format_raw)) == format_indent) ? strlength(indent) : 0; - xml_node node = root; + xml_node_struct* node = root; do { @@ -3610,20 +3613,20 @@ PUGI__NS_BEGIN if (indent_length) text_output_indent(writer, indent, indent_length, depth); - if (node.type() == node_element) + if (PUGI__NODETYPE(node) == node_element) { if (node_output_start(writer, node, flags)) { - node = node.first_child(); + node = node->first_child; depth++; continue; } } - else if (node.type() == node_document) + else if (PUGI__NODETYPE(node) == node_document) { - if (node.first_child()) + if (node->first_child) { - node = node.first_child(); + node = node->first_child; continue; } } @@ -3635,17 +3638,17 @@ PUGI__NS_BEGIN // continue to the next node while (node != root) { - if (node.next_sibling()) + if (node->next_sibling) { - node = node.next_sibling(); + node = node->next_sibling; break; } - node = node.parent(); + node = node->parent; depth--; // write closing node - if (node.type() == node_element) + if (PUGI__NODETYPE(node) == node_element) { if (indent_length) text_output_indent(writer, indent, indent_length, depth); @@ -5383,7 +5386,7 @@ namespace pugi impl::xml_buffered_writer buffered_writer(writer, encoding); - impl::node_output(buffered_writer, *this, indent, flags, depth); + impl::node_output(buffered_writer, _root, indent, flags, depth); } #ifndef PUGIXML_NO_STL @@ -6074,7 +6077,7 @@ namespace pugi if (!(flags & format_raw)) buffered_writer.write('\n'); } - impl::node_output(buffered_writer, *this, indent, flags, 0); + impl::node_output(buffered_writer, _root, indent, flags, 0); } #ifndef PUGIXML_NO_STL diff --git a/tests/test_write.cpp b/tests/test_write.cpp index 98650ac..8fc88e1 100644 --- a/tests/test_write.cpp +++ b/tests/test_write.cpp @@ -51,6 +51,15 @@ TEST_XML(write_cdata_inner, "") CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); } +TEST(write_cdata_null) +{ + xml_document doc; + doc.append_child(node_cdata); + doc.append_child(STR("node")).append_child(node_cdata); + + CHECK_NODE(doc, STR("")); +} + TEST_XML_FLAGS(write_comment, "", parse_comments | parse_fragment) { CHECK_NODE(doc, STR("")); @@ -80,12 +89,32 @@ TEST(write_comment_invalid) CHECK_NODE(doc, STR("")); } +TEST(write_comment_null) +{ + xml_document doc; + doc.append_child(node_comment); + + CHECK_NODE(doc, STR("")); +} + TEST_XML_FLAGS(write_pi, "", parse_pi | parse_fragment) { CHECK_NODE(doc, STR("")); CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); } +TEST(write_pi_null) +{ + xml_document doc; + xml_node node = doc.append_child(node_pi); + + CHECK_NODE(doc, STR("")); + + node.set_value(STR("value")); + + CHECK_NODE(doc, STR("")); +} + TEST_XML_FLAGS(write_declaration, "", parse_declaration | parse_fragment) { CHECK_NODE(doc, STR("")); @@ -98,6 +127,14 @@ TEST_XML_FLAGS(write_doctype, "", parse_doctype | parse_fra CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); } +TEST(write_doctype_null) +{ + xml_document doc; + doc.append_child(node_doctype); + + CHECK_NODE(doc, STR("")); +} + TEST_XML(write_escape, "text") { doc.child(STR("node")).attribute(STR("attr")) = STR("<>'\"&\x04\r\n\t"); @@ -460,3 +497,16 @@ TEST_XML(write_indent_custom, "text\nABCD\nABCDABCDtext\nABCD\n\n"), STR("ABCD"), format_indent); CHECK_NODE_EX(doc, STR("\nABCDE\nABCDEABCDEtext\nABCDE\n\n"), STR("ABCDE"), format_indent); } + +TEST(write_pcdata_null) +{ + xml_document doc; + doc.append_child(STR("node")).append_child(node_pcdata); + + CHECK_NODE(doc, STR("")); + CHECK_NODE_EX(doc, STR("\n"), STR("\t"), format_indent); + + doc.first_child().append_child(node_pcdata); + + CHECK_NODE_EX(doc, STR("\n\t\n\t\n\n"), STR("\t"), format_indent); +} -- cgit v1.2.3