diff options
author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2017-11-13 13:24:43 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-11-13 13:24:43 -0800 |
commit | 7c6d0010b30111dbfb8e523634e7b63328992106 (patch) | |
tree | 708c0ccd524bc87a90bea24378bc7fe3700527a2 | |
parent | 6016e2180e7364f8e203aa2bac6ecb093be0e875 (diff) | |
parent | 3860b5076fd650e8cb0e7378675b241ec96b2e41 (diff) |
Merge pull request #170 from zeux/move
This change implements move ctor and assign support for xml_document.
All node handles remain valid after the move and point to the new document; the only exception is the document node itself (that remains unmoved).
Move is O(document size) in theory because it needs to relocate immediate document children (there is just one in conformant documents) and all memory pages; in practice the memory pages only need the header adjusted, which is ~0.1% of the actual data size.
Move requires no allocations in general, except when using compact mode where some moves need to grow the hash table which can fail (throw).
Fixes #104
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | src/pugixml.cpp | 148 | ||||
-rw-r--r-- | src/pugixml.hpp | 7 | ||||
-rw-r--r-- | tests/test_document.cpp | 185 |
4 files changed, 333 insertions, 16 deletions
@@ -27,13 +27,8 @@ ifeq ($(config),coverage) endif ifeq ($(config),sanitize) - CXXFLAGS+=-fsanitize=address - LDFLAGS+=-fsanitize=address - - ifneq ($(shell uname),Darwin) - CXXFLAGS+=-fsanitize=undefined - LDFLAGS+=-fsanitize=undefined - endif + CXXFLAGS+=-fsanitize=address,undefined + LDFLAGS+=-fsanitize=address,undefined endif ifeq ($(config),analyze) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index b9e54fe..01ab41d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -327,10 +327,10 @@ PUGI__NS_BEGIN item->value = value; } - bool reserve() + bool reserve(size_t extra = 16) { - if (_count + 16 >= _capacity - _capacity / 4) - return rehash(); + if (_count + extra >= _capacity - _capacity / 4) + return rehash(_count + extra); return true; } @@ -347,7 +347,7 @@ PUGI__NS_BEGIN size_t _count; - bool rehash(); + bool rehash(size_t count); item_t* get_item(const void* key) { @@ -387,16 +387,20 @@ PUGI__NS_BEGIN } }; - PUGI__FN_NO_INLINE bool compact_hash_table::rehash() + PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count) { + size_t capacity = 32; + while (count >= capacity - capacity / 4) + capacity *= 2; + compact_hash_table rt; - rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; - rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity)); + rt._capacity = capacity; + rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity)); if (!rt._items) return false; - memset(rt._items, 0, sizeof(item_t) * rt._capacity); + memset(rt._items, 0, sizeof(item_t) * capacity); for (size_t i = 0; i < _capacity; ++i) if (_items[i].key) @@ -405,7 +409,7 @@ PUGI__NS_BEGIN if (_items) xml_memory::deallocate(_items); - _capacity = rt._capacity; + _capacity = capacity; _items = rt._items; assert(_count == rt._count); @@ -6830,6 +6834,25 @@ namespace pugi _destroy(); } +#ifdef PUGIXML_HAS_MOVE + PUGI__FN xml_document::xml_document(xml_document&& rhs): _buffer(0) + { + _create(); + _move(rhs); + } + + PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) + { + if (this == &rhs) return *this; + + _destroy(); + _create(); + _move(rhs); + + return *this; + } +#endif + PUGI__FN void xml_document::reset() { _destroy(); @@ -6925,6 +6948,113 @@ namespace pugi _root = 0; } +#ifdef PUGIXML_HAS_MOVE + PUGI__FN void xml_document::_move(xml_document& rhs) + { + impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root); + impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root); + + // save first child pointer for later; this needs hash access + xml_node_struct* other_first_child = other->first_child; + + #ifdef PUGIXML_COMPACT + // reserve space for the hash table up front; this is the only operation that can fail + // if it does, we have no choice but to throw (if we have exceptions) + if (other_first_child) + { + size_t other_children = 0; + for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) + other_children++; + + // in compact mode, each pointer assignment could result in a hash table request + // during move, we have to relocate document first_child and parents of all children + // normally there's just one child and its parent has a pointerless encoding but + // we assume the worst here + if (!other->_hash->reserve(other_children + 1)) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return; + #else + throw std::bad_alloc(); + #endif + } + } + #endif + + // move allocation state + doc->_root = other->_root; + doc->_busy_size = other->_busy_size; + + // move buffer state + doc->buffer = other->buffer; + doc->extra_buffers = other->extra_buffers; + _buffer = rhs._buffer; + + #ifdef PUGIXML_COMPACT + // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child + doc->hash = other->hash; + doc->_hash = &doc->hash; + + // make sure we don't access other hash up until the end when we reinitialize other document + other->_hash = 0; + #endif + + // move page structure + impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc); + assert(doc_page && !doc_page->prev && !doc_page->next); + + impl::xml_memory_page* other_page = PUGI__GETPAGE(other); + assert(other_page && !other_page->prev); + + // relink pages since root page is embedded into xml_document + if (impl::xml_memory_page* page = other_page->next) + { + assert(page->prev == other_page); + + page->prev = doc_page; + + doc_page->next = page; + other_page->next = 0; + } + + // make sure pages point to the correct document state + for (impl::xml_memory_page* page = doc_page->next; page; page = page->next) + { + assert(page->allocator == other); + + page->allocator = doc; + + #ifdef PUGIXML_COMPACT + // this automatically migrates most children between documents and prevents ->parent assignment from allocating + if (page->compact_shared_parent == other) + page->compact_shared_parent = doc; + #endif + } + + // move tree structure + assert(!doc->first_child); + + doc->first_child = other_first_child; + + for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) + { + #ifdef PUGIXML_COMPACT + // most children will have migrated when we reassigned compact_shared_parent + assert(node->parent == other || node->parent == doc); + + node->parent = doc; + #else + assert(node->parent == other); + node->parent = doc; + #endif + } + + // reset other document + new (other) impl::xml_document_struct(PUGI__GETPAGE(other)); + rhs._buffer = 0; + } +#endif + #ifndef PUGIXML_NO_STL PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) { diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 5059c96..0058fd3 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -983,6 +983,7 @@ namespace pugi void _create(); void _destroy(); + void _move(xml_document& rhs); public: // Default constructor, makes empty document @@ -991,6 +992,12 @@ namespace pugi // Destructor, invalidates all node/attribute handles to this document ~xml_document(); + #ifdef PUGIXML_HAS_MOVE + // Move semantics support + xml_document(xml_document&& rhs); + xml_document& operator=(xml_document&& rhs); + #endif + // Removes all nodes, leaving the empty document void reset(); diff --git a/tests/test_document.cpp b/tests/test_document.cpp index ecbe6dc..08d836c 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -1621,3 +1621,188 @@ TEST(document_convert_out_of_memory) delete[] files[j].data; } } + +#ifdef PUGIXML_HAS_MOVE +TEST_XML(document_move_ctor, "<node1/><node2/>") +{ + xml_document other = std::move(doc); + + CHECK(doc.first_child().empty()); + + CHECK_STRING(other.first_child().name(), STR("node1")); + CHECK(other.first_child().parent() == other); + + CHECK_STRING(other.last_child().name(), STR("node2")); + CHECK(other.last_child().parent() == other); +} + +TEST_XML(document_move_assign, "<node1/><node2/>") +{ + xml_document other; + CHECK(other.load_string(STR("<node3/>"))); + + other = std::move(doc); + + CHECK(doc.first_child().empty()); + + CHECK_STRING(other.first_child().name(), STR("node1")); + CHECK(other.first_child().parent() == other); + + CHECK_STRING(other.last_child().name(), STR("node2")); + CHECK(other.last_child().parent() == other); +} + +TEST_XML(document_move_zero_alloc, "<node1/><node2/>") +{ + test_runner::_memory_fail_threshold = 1; + + xml_document other = std::move(doc); + + CHECK(doc.first_child().empty()); + + CHECK_STRING(other.first_child().name(), STR("node1")); + CHECK(other.first_child().parent() == other); + + CHECK_STRING(other.last_child().name(), STR("node2")); + CHECK(other.last_child().parent() == other); +} + +TEST(document_move_append_buffer) +{ + xml_document* doc = new xml_document(); + CHECK(doc->load_string(STR("<node1 attr1='value1'><node2/></node1>"))); + CHECK(doc->child(STR("node1")).append_buffer("<node3/>", 8)); + CHECK(doc->child(STR("node1")).append_buffer("<node4/>", 8)); + + xml_document other = std::move(*doc); + delete doc; + + CHECK(other.child(STR("node1")).append_buffer("<node5/>", 8)); + CHECK(other.child(STR("node1")).append_buffer("<node6/>", 8)); + + CHECK_NODE(other, STR("<node1 attr1=\"value1\"><node2/><node3/><node4/><node5/><node6/></node1>")); +} + +TEST(document_move_append_child) +{ + xml_document* doc = new xml_document(); + CHECK(doc->load_string(STR("<node1 attr1='value1'><node2/></node1>"))); + + xml_document other = std::move(*doc); + delete doc; + + for (int i = 0; i < 3000; ++i) + other.child(STR("node1")).append_child(STR("node")); + + for (int i = 0; i < 3000; ++i) + other.child(STR("node1")).remove_child(other.child(STR("node1")).last_child()); + + CHECK_NODE(other, STR("<node1 attr1=\"value1\"><node2/></node1>")); + + other.remove_child(other.first_child()); + + CHECK(!other.first_child()); +} + +TEST(document_move_empty) +{ + xml_document* doc = new xml_document(); + xml_document other = std::move(*doc); + delete doc; +} + +TEST(document_move_large) +{ + xml_document* doc = new xml_document(); + + xml_node dn = doc->append_child(STR("node")); + + for (int i = 0; i < 3000; ++i) + dn.append_child(STR("child")); + + xml_document other = std::move(*doc); + delete doc; + + xml_node on = other.child(STR("node")); + + for (int i = 0; i < 3000; ++i) + CHECK(on.remove_child(on.first_child())); + + CHECK(!on.first_child()); +} + +TEST_XML(document_move_buffer, "<node1/><node2/>") +{ + CHECK(doc.child(STR("node2")).offset_debug() == 9); + + xml_document other = std::move(doc); + + CHECK(other.child(STR("node2")).offset_debug() == 9); +} + +TEST_XML(document_move_append_child_zero_alloc, "<node1/><node2/>") +{ + test_runner::_memory_fail_threshold = 1; + + xml_document other = std::move(doc); + + CHECK(other.append_child(STR("node3"))); + + CHECK_NODE(other, STR("<node1/><node2/><node3/>")); +} + +TEST(document_move_empty_zero_alloc) +{ + xml_document* docs = new xml_document[32]; + + test_runner::_memory_fail_threshold = 1; + + for (int i = 1; i < 32; ++i) + docs[i] = std::move(docs[i-1]); + + delete[] docs; +} + +#ifndef PUGIXML_COMPACT +TEST(document_move_repeated_zero_alloc) +{ + xml_document docs[32]; + + CHECK(docs[0].load_string(STR("<node><child/></node>"))); + + test_runner::_memory_fail_threshold = 1; + + for (int i = 1; i < 32; ++i) + docs[i] = std::move(docs[i-1]); + + for (int i = 0; i < 31; ++i) + CHECK(!docs[i].first_child()); + + CHECK_NODE(docs[31], STR("<node><child/></node>")); +} +#endif + +#ifdef PUGIXML_COMPACT +TEST(document_move_compact_fail) +{ + xml_document docs[32]; + + CHECK(docs[0].load_string(STR("<node><child/></node>"))); + + test_runner::_memory_fail_threshold = 1; + + int safe_count = 21; + + for (int i = 1; i <= safe_count; ++i) + docs[i] = std::move(docs[i-1]); + + CHECK_ALLOC_FAIL(docs[safe_count+1] = std::move(docs[safe_count])); + + for (int i = 0; i < safe_count; ++i) + CHECK(!docs[i].first_child()); + + CHECK_NODE(docs[safe_count], STR("<node><child/></node>")); + CHECK(!docs[safe_count+1].first_child()); +} +#endif +#endif |