From edb57c96a883ed76001cb127e39a458aaf5e8731 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 6 Oct 2014 22:25:49 -0700 Subject: Switch to a 3-byte representation for compact strings To make this possible name and value in the node structure had to be merged into one contents field. Not sure what to do with node_pi, since it is the only type that required both. --- src/pugixml.cpp | 180 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 99 insertions(+), 81 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index c0a2980..0cbf088 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -276,11 +276,13 @@ PUGI__NS_BEGIN static const uintptr_t xml_memory_page_alignment = 64; static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); static const uintptr_t xml_memory_page_contents_shared_mask = 32; + static const uintptr_t xml_memory_page_contents_allocated_mask = 16; static const uintptr_t xml_memory_page_name_allocated_mask = 16; static const uintptr_t xml_memory_page_value_allocated_mask = 8; static const uintptr_t xml_memory_page_type_mask = 7; static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; + static const uintptr_t xml_memory_page_contents_allocated_or_shared_mask = xml_memory_page_contents_allocated_mask | xml_memory_page_contents_shared_mask; #define PUGI__NODETYPE(n) static_cast(((n)->header & impl::xml_memory_page_type_mask) + 1) @@ -812,7 +814,7 @@ PUGI__NS_BEGIN template class compact_string { public: - compact_string(): _data(0) + compact_string(): _data0(0), _data1(0), _data2(0) { } @@ -831,35 +833,49 @@ PUGI__NS_BEGIN { page->compact_string_base = value; - _data = 1; + _data0 = 1; + _data1 = 0; + _data2 = 0; } else { ptrdiff_t offset = value - page->compact_string_base; - if (offset >= 0 && offset <= 65533) - _data = static_cast(offset + 1); + if (offset >= 0 && offset <= 1677213) + { + _data0 = static_cast(offset + 1); + _data1 = static_cast((offset + 1) >> 8); + _data2 = static_cast((offset + 1) >> 16); + } else { *page->allocator->_hash->insert(this, tag) = value; - _data = 65535; + _data0 = 255; + _data1 = 255; + _data2 = 255; } } } else - _data = 0; + { + _data0 = 0; + _data1 = 0; + _data2 = 0; + } } operator char_t* const() const { - if (_data) + unsigned int data = _data0 + (_data1 << 8) + (_data2 << 16); + + if (data) { xml_memory_page* page = compact_get_page(this, header_offset); - if (_data < 65535) + if (data < 16777215) { - return page->compact_string_base + (_data - 1); + return page->compact_string_base + (data - 1); } else { @@ -876,7 +892,9 @@ PUGI__NS_BEGIN } private: - unsigned short _data; + unsigned char _data0; + unsigned char _data1; + unsigned char _data2; }; PUGI__NS_END @@ -898,10 +916,10 @@ namespace pugi impl::compact_header header; - unsigned char padding[3]; + unsigned char padding; - impl::compact_string<6, /*tag*/11> name; ///< Pointer to attribute name. - impl::compact_string<8, /*tag*/12> value; ///< Pointer to attribute value. + impl::compact_string<4, /*tag*/11> name; ///< Pointer to attribute name. + impl::compact_string<7, /*tag*/12> value; ///< Pointer to attribute value. impl::compact_pointer prev_attribute_c; ///< Previous attribute (cyclic list) impl::compact_pointer next_attribute; ///< Next attribute @@ -921,17 +939,18 @@ namespace pugi impl::compact_header header; - impl::compact_pointer parent; ///< Pointer to parent + unsigned char padding; - impl::compact_string<4, /*tag*/21> name; ///< Pointer to element name. - impl::compact_string<6, /*tag*/22> value; ///< Pointer to any associated string data. + impl::compact_string<4, /*tag*/21> contents; ///< Pointer to element name. - impl::compact_pointer first_child; ///< First child + impl::compact_pointer parent; ///< Pointer to parent - impl::compact_pointer prev_sibling_c; ///< Left brother (cyclic list) - impl::compact_pointer next_sibling; ///< Right brother + impl::compact_pointer first_child; ///< First child - impl::compact_pointer first_attribute; ///< First attribute + impl::compact_pointer prev_sibling_c; ///< Left brother (cyclic list) + impl::compact_pointer next_sibling; ///< Right brother + + impl::compact_pointer first_attribute; ///< First attribute }; } #else @@ -959,7 +978,7 @@ namespace pugi { /// Default ctor /// \param type - node type - xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) + xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | (type - 1)), parent(0), contents(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) { } @@ -967,8 +986,7 @@ namespace pugi xml_node_struct* parent; ///< Pointer to parent - char_t* name; ///< Pointer to element name. - char_t* value; ///< Pointer to any associated string data. + char_t* contents; ///< Pointer to element name/value xml_node_struct* first_child; ///< First child @@ -1052,8 +1070,7 @@ PUGI__NS_BEGIN { uintptr_t header = n->header; - if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); - if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); + if (header & impl::xml_memory_page_contents_allocated_mask) alloc.deallocate_string(n->contents); for (xml_attribute_struct* attr = n->first_attribute; attr; ) { @@ -2827,14 +2844,14 @@ PUGI__NS_BEGIN if (PUGI__OPTSET(parse_comments)) { PUGI__PUSHNODE(node_comment); // Append a new node on the tree. - cursor->value = s; // Save the offset. + cursor->contents = s; // Save the offset. } if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) { s = strconv_comment(s, endch); - if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); + if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->contents); } else { @@ -2860,13 +2877,13 @@ PUGI__NS_BEGIN if (PUGI__OPTSET(parse_cdata)) { PUGI__PUSHNODE(node_cdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. + cursor->contents = s; // Save the offset. if (PUGI__OPTSET(parse_eol)) { s = strconv_cdata(s, endch); - if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); + if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->contents); } else { @@ -2910,7 +2927,7 @@ PUGI__NS_BEGIN PUGI__PUSHNODE(node_doctype); - cursor->value = mark; + cursor->contents = mark; PUGI__POPNODE(); } @@ -2956,7 +2973,7 @@ PUGI__NS_BEGIN PUGI__PUSHNODE(node_pi); } - cursor->name = target; + cursor->contents = target; PUGI__ENDSEG(); @@ -2990,7 +3007,7 @@ PUGI__NS_BEGIN else { // store value and step over > - cursor->value = value; + // TODO: node_pi value:cursor->value = value; PUGI__POPNODE(); PUGI__ENDSEG(); @@ -3035,7 +3052,7 @@ PUGI__NS_BEGIN { PUGI__PUSHNODE(node_element); // Append a new node to the tree. - cursor->name = s; + cursor->contents = s; PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. @@ -3149,7 +3166,7 @@ PUGI__NS_BEGIN { ++s; - char_t* name = cursor->name; + char_t* name = cursor->contents; if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s); while (PUGI__IS_CHARTYPE(*s, ct_symbol)) @@ -3220,7 +3237,7 @@ PUGI__NS_BEGIN if (cursor->parent || PUGI__OPTSET(parse_fragment)) { PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. + cursor->contents = s; // Save the offset. s = strconv_pcdata(s); @@ -4088,8 +4105,7 @@ PUGI__NS_BEGIN PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) { - node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); - node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); + node_copy_string(dn->contents, dn->header, xml_memory_page_contents_allocated_mask, sn->contents, sn->header, shared_alloc); for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) { @@ -4680,6 +4696,20 @@ PUGI__NS_BEGIN return res; } + + inline bool has_name(xml_node_struct* node) + { + static const bool result[] = { false, false, true, false, false, false, true, true, false }; + + return result[PUGI__NODETYPE(node)]; + } + + inline bool has_value(xml_node_struct* node) + { + static const bool result[] = { false, false, false, true, true, true, false, false, true }; + + return result[PUGI__NODETYPE(node)]; + } PUGI__NS_END namespace pugi @@ -5080,7 +5110,7 @@ namespace pugi PUGI__FN const char_t* xml_node::name() const { - return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); + return (_root && impl::has_name(_root) && _root->contents) ? _root->contents : PUGIXML_TEXT(""); } PUGI__FN xml_node_type xml_node::type() const @@ -5090,7 +5120,7 @@ namespace pugi PUGI__FN const char_t* xml_node::value() const { - return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); + return (_root && impl::has_value(_root) && _root->contents) ? _root->contents : PUGIXML_TEXT(""); } PUGI__FN xml_node xml_node::child(const char_t* name_) const @@ -5098,7 +5128,7 @@ namespace pugi if (!_root) return xml_node(); for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); + if (impl::has_name(i) && i->contents && impl::strequal(name_, i->contents)) return xml_node(i); return xml_node(); } @@ -5119,7 +5149,7 @@ namespace pugi if (!_root) return xml_node(); for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); + if (impl::has_name(i) && i->contents && impl::strequal(name_, i->contents)) return xml_node(i); return xml_node(); } @@ -5134,7 +5164,7 @@ namespace pugi if (!_root) return xml_node(); for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); + if (impl::has_name(i) && i->contents && impl::strequal(name_, i->contents)) return xml_node(i); return xml_node(); } @@ -5171,8 +5201,8 @@ namespace pugi if (!_root) return PUGIXML_TEXT(""); for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->value && impl::is_text_node(i)) - return i->value; + if (impl::has_value(i) && i->contents && impl::is_text_node(i)) + return i->contents; return PUGIXML_TEXT(""); } @@ -5209,7 +5239,7 @@ namespace pugi case node_pi: case node_declaration: case node_element: - return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs); + return impl::strcpy_insitu(_root->contents, _root->header, impl::xml_memory_page_contents_allocated_mask, rhs); default: return false; @@ -5225,7 +5255,7 @@ namespace pugi case node_pcdata: case node_comment: case node_doctype: - return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::strcpy_insitu(_root->contents, _root->header, impl::xml_memory_page_contents_allocated_mask, rhs); default: return false; @@ -5613,15 +5643,15 @@ namespace pugi if (!extra) return impl::make_parse_result(status_out_of_memory); // save name; name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level - char_t* rootname = _root->name; - _root->name = 0; + char_t* rootname = _root->contents; + _root->contents = 0; // parse char_t* buffer = 0; xml_parse_result res = impl::load_buffer_impl(doc, _root, const_cast(contents), size, options, encoding, false, false, &buffer); // restore name - _root->name = rootname; + _root->contents = rootname; // add extra buffer to the list extra->buffer = buffer; @@ -5636,7 +5666,7 @@ namespace pugi if (!_root) return xml_node(); for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) + if (impl::has_name(i) && i->contents && impl::strequal(name_, i->contents)) { for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT(""))) @@ -5714,7 +5744,7 @@ namespace pugi { for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) { - if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) + if (impl::has_name(j) && j->contents && impl::strequalrange(j->contents, path_segment, static_cast(path_segment_end - path_segment))) { xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); @@ -5824,19 +5854,8 @@ namespace pugi case node_document: return 0; - case node_element: - case node_declaration: - case node_pi: - return (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) ? -1 : _root->name - buffer; - - case node_pcdata: - case node_cdata: - case node_comment: - case node_doctype: - return (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) ? -1 : _root->value - buffer; - default: - return -1; + return (_root->header & impl::xml_memory_page_contents_allocated_or_shared_mask) ? -1 : _root->contents - buffer; } } @@ -5902,49 +5921,49 @@ namespace pugi { xml_node_struct* d = _data(); - return (d && d->value) ? d->value : PUGIXML_TEXT(""); + return (d && d->contents) ? d->contents : PUGIXML_TEXT(""); } PUGI__FN const char_t* xml_text::as_string(const char_t* def) const { xml_node_struct* d = _data(); - return (d && d->value) ? d->value : def; + return (d && d->contents) ? d->contents : def; } PUGI__FN int xml_text::as_int(int def) const { xml_node_struct* d = _data(); - return impl::get_value_int(d ? PUGI__COMPACT(d->value) : 0, def); + return impl::get_value_int(d ? PUGI__COMPACT(d->contents) : 0, def); } PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const { xml_node_struct* d = _data(); - return impl::get_value_uint(d ? PUGI__COMPACT(d->value) : 0, def); + return impl::get_value_uint(d ? PUGI__COMPACT(d->contents) : 0, def); } PUGI__FN double xml_text::as_double(double def) const { xml_node_struct* d = _data(); - return impl::get_value_double(d ? PUGI__COMPACT(d->value) : 0, def); + return impl::get_value_double(d ? PUGI__COMPACT(d->contents) : 0, def); } PUGI__FN float xml_text::as_float(float def) const { xml_node_struct* d = _data(); - return impl::get_value_float(d ? PUGI__COMPACT(d->value) : 0, def); + return impl::get_value_float(d ? PUGI__COMPACT(d->contents) : 0, def); } PUGI__FN bool xml_text::as_bool(bool def) const { xml_node_struct* d = _data(); - return impl::get_value_bool(d ? PUGI__COMPACT(d->value) : 0, def); + return impl::get_value_bool(d ? PUGI__COMPACT(d->contents) : 0, def); } #ifdef PUGIXML_HAS_LONG_LONG @@ -5952,14 +5971,14 @@ namespace pugi { xml_node_struct* d = _data(); - return impl::get_value_llong(d ? PUGI__COMPACT(d->value) : 0, def); + return impl::get_value_llong(d ? PUGI__COMPACT(d->contents) : 0, def); } PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const { xml_node_struct* d = _data(); - return impl::get_value_ullong(d ? PUGI__COMPACT(d->value) : 0, def); + return impl::get_value_ullong(d ? PUGI__COMPACT(d->contents) : 0, def); } #endif @@ -5967,35 +5986,35 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::strcpy_insitu(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(int rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(unsigned int rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(double rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(bool rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } #ifdef PUGIXML_HAS_LONG_LONG @@ -6003,14 +6022,14 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(unsigned long long rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } #endif @@ -7358,8 +7377,7 @@ PUGI__NS_BEGIN { if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) { - if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; - if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; + if (node->contents && (node->header & impl::xml_memory_page_contents_allocated_or_shared_mask) == 0) return node->contents; } return 0; -- cgit v1.2.3