From 9a5d7f62fdfb7f9c9f9354758777a3a881b25638 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Mon, 6 Nov 2006 18:38:04 +0000 Subject: Added child_value and child_value_w helpers, added more eol handling flags, optimized strconv_t, fixed warnings in MSVC (W4), some inner refactoring, fixed child_value for empty nodes git-svn-id: http://pugixml.googlecode.com/svn/trunk@3 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 222 ++++++++++++++++++++++++++++++++++++++------------------ src/pugixml.hpp | 20 +++-- 2 files changed, 168 insertions(+), 74 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 1018bda..4ef32b9 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -101,9 +101,7 @@ namespace pugi } namespace -{ - using namespace pugi; - +{ namespace utf8 { const unsigned char BYTE_MASK = 0xBF; @@ -111,9 +109,12 @@ namespace const unsigned char BYTE_MASK_READ = 0x3F; const unsigned char FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; } +} +namespace pugi +{ // Get the size that is needed for strutf16_utf8 applied to all s characters - size_t strutf16_utf8_size(const wchar_t* s) + static size_t strutf16_utf8_size(const wchar_t* s) { size_t length = 0; @@ -133,7 +134,7 @@ namespace // \param ch - char // \return position after the last char // \rem yes, this is trom TinyXML. How would you write it the other way, without switch trick?.. - char* strutf16_utf8(char* s, unsigned int ch) + static char* strutf16_utf8(char* s, unsigned int ch) { unsigned int length; @@ -165,7 +166,7 @@ namespace } // Get the size that is needed for strutf8_utf16 applied to all s characters - size_t strutf8_utf16_size(const char* s) + static size_t strutf8_utf16_size(const char* s) { size_t length = 0; @@ -181,7 +182,7 @@ namespace // \param s - pointer to string // \param ch - char // \return position after the last char - const char* strutf8_utf16(const char* s, unsigned int& ch) + static const char* strutf8_utf16(const char* s, unsigned int& ch) { unsigned int length; @@ -248,14 +249,28 @@ namespace static bool chartype_lbracket(char c) { return c == '['; } static bool chartype_rbracket(char c) { return c == ']'; } - template static void strconv_t(char** s) + template static void strconv_t(char** s) { if (!s || !*s) return; + if (!opt_trim && !opt_escape && !opt_wnorm && !opt_wconv && !opt_eol) return; + // Trim whitespaces if (opt_trim) while (chartype_space(**s)) ++(*s); - + char* str = *s; + + // Skip usual symbols + if (opt_escape || opt_wnorm || opt_wconv || opt_eol) + { + while (*str) + { + if (opt_wconv && *str == '&') break; + if ((opt_wnorm || opt_wconv || opt_eol) && chartype_space(*str)) break; + ++str; + } + } + char* lastpos = str; if (!*str) return; @@ -359,6 +374,14 @@ namespace } } } + else if (chartype_space(*str) && opt_wnorm) + { + *lastpos++ = ' '; + + while (chartype_space(*str)) ++str; + + continue; + } else if (chartype_space(*str) && opt_wconv) { if (*str == 0x0d && *(str + 1) == 0x0a) ++str; @@ -368,7 +391,7 @@ namespace continue; } - else if (*str == 0x0d && !opt_wnorm) + else if (*str == 0x0d && !opt_wnorm && opt_eol) { if (*(str + 1) == 0x0a) ++str; ++str; @@ -376,14 +399,6 @@ namespace continue; } - else if (chartype_space(*str) && opt_wnorm) - { - *lastpos++ = ' '; - - while (chartype_space(*str)) ++str; - - continue; - } *lastpos++ = *str++; } @@ -395,64 +410,128 @@ namespace } else *lastpos = 0; } - - static void strconv_setup(void (*&func)(char**), unsigned int opt_trim, unsigned int opt_escape, unsigned int opt_wnorm, unsigned int opt_wconv) + + static void strconv_setup(void (*&func)(char**), unsigned int opt_trim, unsigned int opt_escape, unsigned int opt_wnorm, unsigned int opt_wconv, unsigned int opt_eol) { - if (opt_wconv) + if (opt_eol) { - if (opt_trim) + if (opt_wconv) { - if (opt_escape) + if (opt_trim) { - if (opt_wnorm) func = &strconv_t; - else func = &strconv_t; + if (opt_escape) + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } + else + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } } else { - if (opt_wnorm) func = &strconv_t; - else func = &strconv_t; + if (opt_escape) + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } + else + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } } } else { - if (opt_escape) + if (opt_trim) { - if (opt_wnorm) func = &strconv_t; - else func = &strconv_t; + if (opt_escape) + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } + else + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } } else { - if (opt_wnorm) func = &strconv_t; - else func = &strconv_t; + if (opt_escape) + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } + else + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } } } } else { - if (opt_trim) + if (opt_wconv) { - if (opt_escape) + if (opt_trim) { - if (opt_wnorm) func = &strconv_t; - else func = &strconv_t; + if (opt_escape) + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } + else + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } } else { - if (opt_wnorm) func = &strconv_t; - else func = &strconv_t; + if (opt_escape) + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } + else + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } } } else { - if (opt_escape) + if (opt_trim) { - if (opt_wnorm) func = &strconv_t; - else func = &strconv_t; + if (opt_escape) + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } + else + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } } else { - if (opt_wnorm) func = &strconv_t; - else func = &strconv_t; + if (opt_escape) + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } + else + { + if (opt_wnorm) func = &strconv_t; + else func = &strconv_t; + } } } } @@ -526,8 +605,8 @@ namespace void (*strconv_pcdata)(char**); void (*strconv_attribute)(char**); - strconv_setup(strconv_attribute, OPTSET(parse_trim_attribute), OPTSET(parse_escapes_attribute), OPTSET(parse_wnorm_attribute), OPTSET(parse_wconv_attribute)); - strconv_setup(strconv_pcdata, OPTSET(parse_trim_pcdata), OPTSET(parse_escapes_pcdata), OPTSET(parse_wnorm_pcdata), false); + strconv_setup(strconv_attribute, OPTSET(parse_trim_attribute), OPTSET(parse_escapes_attribute), OPTSET(parse_wnorm_attribute), OPTSET(parse_wconv_attribute), OPTSET(parse_eol_attribute)); + strconv_setup(strconv_pcdata, OPTSET(parse_trim_pcdata), OPTSET(parse_escapes_pcdata), OPTSET(parse_wnorm_pcdata), false, OPTSET(parse_eol_pcdata)); char ch = 0; // Current char, in cases where we must null-terminate before we test. xml_node_struct* cursor = xmldoc; // Tree node cursor. @@ -622,7 +701,7 @@ namespace if (OPTSET(parse_eol_cdata)) { - strconv_t(&cursor->value); + strconv_t(&cursor->value); } POPNODE(); // Pop since this is a standalone. @@ -860,10 +939,13 @@ namespace } return s; } + + private: + const xml_parser_impl& operator=(const xml_parser_impl&); }; // Compare lhs with [rhs_begin, rhs_end) - int strcmprange(const char* lhs, const char* rhs_begin, const char* rhs_end) + static int strcmprange(const char* lhs, const char* rhs_begin, const char* rhs_end) { while (*lhs && rhs_begin != rhs_end && *lhs == *rhs_begin) { @@ -876,7 +958,7 @@ namespace } // Character set pattern match. - int strcmpwild_cset(const char** src, const char** dst) + static int strcmpwild_cset(const char** src, const char** dst) { int find = 0, excl = 0, star = 0; @@ -909,22 +991,14 @@ namespace return find; } -} -namespace pugi -{ namespace impl { int strcmpwild(const char* src, const char* dst); } -} - -namespace -{ - using namespace pugi; // Wildcard pattern match. - int strcmpwild_astr(const char** src, const char** dst) + static int strcmpwild_astr(const char** src, const char** dst) { int find = 1; ++(*src); @@ -952,10 +1026,7 @@ namespace return find; } } -} -namespace pugi -{ namespace impl { // Compare two strings, with globbing, and character sets. @@ -977,16 +1048,18 @@ namespace pugi } } - extern "C" + int strcmp(const char* lhs, const char* rhs) { - int strcmpwildimpl(const char* src, const char* dst) - { - return impl::strcmpwild(src, dst); - } + return ::strcmp(lhs, rhs); + } - typedef int (*strcmpfunc)(const char*, const char*); + int strcmpwildimpl(const char* src, const char* dst) + { + return impl::strcmpwild(src, dst); } + typedef int (*strcmpfunc)(const char*, const char*); + xml_attribute_struct::xml_attribute_struct(): name(0), value(0), prev_attribute(0), next_attribute(0) { } @@ -1355,12 +1428,23 @@ namespace pugi const char* xml_node::child_value() const { - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if ((i->type == node_pcdata || i->type == node_cdata) && i->value) - return i->value; + if (!empty()) + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if ((i->type == node_pcdata || i->type == node_cdata) && i->value) + return i->value; return ""; } + const char* xml_node::child_value(const char* name) const + { + return child(name).child_value(); + } + + const char* xml_node::child_value_w(const char* name) const + { + return child_w(name).child_value(); + } + xml_attribute xml_node::first_attribute() const { return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 026e6ba..746f6c3 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -46,10 +46,13 @@ namespace pugi const unsigned int parse_wnorm_pcdata = 0x00000200; ///< Normalize spaces in pcdata const unsigned int parse_wnorm_attribute = 0x00000400; ///< Normalize spaces in attributes const unsigned int parse_wconv_attribute = 0x00000800; ///< Convert space-like characters to spaces in attributes (only if wnorm is not set) - const unsigned int parse_eol_cdata = 0x00001000; ///< Perform EOL handling in CDATA sections - const unsigned int parse_check_end_tags = 0x00002000; ///< Check start and end tag names and return error if names mismatch - const unsigned int parse_match_end_tags = 0x00004000; ///< Try to find corresponding start tag for an end tag - const unsigned int parse_default = 0x0000FFFF & ~parse_ws_pcdata; ///< Set all flags, except parse_ws_pcdata + const unsigned int parse_eol_pcdata = 0x00001000; ///< Perform EOL handling in pcdata + const unsigned int parse_eol_attribute = 0x00002000; ///< Perform EOL handling in attrobites + const unsigned int parse_eol_cdata = 0x00004000; ///< Perform EOL handling in CDATA sections + const unsigned int parse_check_end_tags = 0x00010000; ///< Check start and end tag names and return error if names mismatch + const unsigned int parse_match_end_tags = 0x00020000; ///< Try to find corresponding start tag for an end tag + ///< Set all flags, except parse_ws_pcdata and parse_trim_attribute + const unsigned int parse_default = 0x00FFFFFF & ~parse_ws_pcdata & ~parse_trim_attribute; const unsigned int parse_noset = 0x80000000; ///< Parse with flags in xml_parser const unsigned int parse_w3c = parse_pi | parse_comments | parse_cdata | @@ -266,6 +269,13 @@ namespace pugi /// Return PCDATA/CDATA that is child of current node. If none, return empty string. const char* child_value() const; + /// Return PCDATA/CDATA that is child of specified child node. If none, return empty string. + const char* child_value(const char* name) const; + + /// Return PCDATA/CDATA that is child of specified child node. If none, return empty string. + /// Enable wildcard matching. + const char* child_value_w(const char* name) const; + public: /// Access node's first attribute if any, else xml_attribute() xml_attribute first_attribute() const; @@ -472,7 +482,7 @@ namespace pugi std::vector _buffer; ///< character buffer xml_memory_block _memory; ///< Memory block - + xml_node_struct* _xmldoc; ///< Pointer to current XML document tree root. unsigned int _optmsk; ///< Parser options. -- cgit v1.2.3