Added child_value and child_value_w helpers, added more eol handling flags, optimized strconv_t, fixed warnings in MSVC (W4), some inner refactoring, fixed child_value for empty nodes

git-svn-id: http://pugixml.googlecode.com/svn/trunk@3 99668b35-9821-0410-8761-19e4c4f06640
author: arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> 2006-11-06 18:38:04 +0000
committer: arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> 2006-11-06 18:38:04 +0000
commit: 9a5d7f62fdfb7f9c9f9354758777a3a881b25638 (patch)
tree: 77083aa5173231be69e965ea8136d6d5def6ac1a /src
parent: 69cc3fcb3a28d4b7f69dfa5f4dcc025eb53332d8 (diff)
2 files changed, 168 insertions, 74 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 1018bda..4ef32b9 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -101,9 +101,7 @@ namespace pugi
 }
 
 namespace
-{
-	using namespace pugi;
-	
+{	
 	namespace utf8
 	{
 		const unsigned char BYTE_MASK = 0xBF;
@@ -111,9 +109,12 @@ namespace
 		const unsigned char BYTE_MASK_READ = 0x3F;
 		const unsigned char FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 	}
+}
 
+namespace pugi
+{
 	// Get the size that is needed for strutf16_utf8 applied to all s characters
-	size_t strutf16_utf8_size(const wchar_t* s)
+	static size_t strutf16_utf8_size(const wchar_t* s)
 	{
 		size_t length = 0;
 
@@ -133,7 +134,7 @@ namespace
 	// \param ch - char
 	// \return position after the last char
 	// \rem yes, this is trom TinyXML. How would you write it the other way, without switch trick?..
-	char* strutf16_utf8(char* s, unsigned int ch)
+	static char* strutf16_utf8(char* s, unsigned int ch)
 	{
 		unsigned int length;
 
@@ -165,7 +166,7 @@ namespace
 	}
 
 	// Get the size that is needed for strutf8_utf16 applied to all s characters
-	size_t strutf8_utf16_size(const char* s)
+	static size_t strutf8_utf16_size(const char* s)
 	{
 		size_t length = 0;
 
@@ -181,7 +182,7 @@ namespace
 	// \param s - pointer to string
 	// \param ch - char
 	// \return position after the last char
-	const char* strutf8_utf16(const char* s, unsigned int& ch)
+	static const char* strutf8_utf16(const char* s, unsigned int& ch)
 	{
 		unsigned int length;
 
@@ -248,14 +249,28 @@ namespace
 		static bool chartype_lbracket(char c) { return c == '['; }
 		static bool chartype_rbracket(char c) { return c == ']'; }
 
-		template <bool opt_trim, bool opt_escape, bool opt_wnorm, bool opt_wconv> static void strconv_t(char** s)
+		template <bool opt_trim, bool opt_escape, bool opt_wnorm, bool opt_wconv, bool opt_eol> static void strconv_t(char** s)
 		{
 			if (!s || !*s) return;
 
+			if (!opt_trim && !opt_escape && !opt_wnorm && !opt_wconv && !opt_eol) return;
+
 			// Trim whitespaces
 			if (opt_trim) while (chartype_space(**s)) ++(*s);
-
+			
 			char* str = *s;
+			
+			// Skip usual symbols
+			if (opt_escape || opt_wnorm || opt_wconv || opt_eol)
+			{
+				while (*str)
+				{
+					if (opt_wconv && *str == '&') break;
+					if ((opt_wnorm || opt_wconv || opt_eol) && chartype_space(*str)) break;
+					++str;
+				}
+			}
+
 			char* lastpos = str;
 
 			if (!*str) return;
@@ -359,6 +374,14 @@ namespace
 						}
 					}
 				}
+				else if (chartype_space(*str) && opt_wnorm)
+				{
+					*lastpos++ = ' ';
+		
+					while (chartype_space(*str)) ++str;
+
+					continue;
+				}
 				else if (chartype_space(*str) && opt_wconv)
 				{
 					if (*str == 0x0d && *(str + 1) == 0x0a) ++str;
@@ -368,7 +391,7 @@ namespace
 
 					continue;
 				}
-				else if (*str == 0x0d && !opt_wnorm)
+				else if (*str == 0x0d && !opt_wnorm && opt_eol)
 				{
 					if (*(str + 1) == 0x0a) ++str;
 					++str;
@@ -376,14 +399,6 @@ namespace
 
 					continue;
 				}
-				else if (chartype_space(*str) && opt_wnorm)
-				{
-					*lastpos++ = ' ';
-		
-					while (chartype_space(*str)) ++str;
-
-					continue;
-				}
 				
 				*lastpos++ = *str++;
 			}
@@ -395,64 +410,128 @@ namespace
 			}
 			else *lastpos = 0;
 		}
-
-		static void strconv_setup(void (*&func)(char**), unsigned int opt_trim, unsigned int opt_escape, unsigned int opt_wnorm, unsigned int opt_wconv)
+	
+		static void strconv_setup(void (*&func)(char**), unsigned int opt_trim, unsigned int opt_escape, unsigned int opt_wnorm, unsigned int opt_wconv, unsigned int opt_eol)
 		{
-			if (opt_wconv)
+			if (opt_eol)
 			{
-				if (opt_trim)
+				if (opt_wconv)
 				{
-					if (opt_escape)
+					if (opt_trim)
 					{
-						if (opt_wnorm) func = &strconv_t<true, true, true, true>;
-						else func = &strconv_t<true, true, false, true>;
+						if (opt_escape)
+						{
+							if (opt_wnorm) func = &strconv_t<true, true, true, true, true>;
+							else func = &strconv_t<true, true, false, true, true>;
+						}
+						else
+						{
+							if (opt_wnorm) func = &strconv_t<true, false, true, true, true>;
+							else func = &strconv_t<true, false, false, true, true>;
+						}
 					}
 					else
 					{
-						if (opt_wnorm) func = &strconv_t<true, false, true, true>;
-						else func = &strconv_t<true, false, false, true>;
+						if (opt_escape)
+						{
+							if (opt_wnorm) func = &strconv_t<false, true, true, true, true>;
+							else func = &strconv_t<false, true, false, true, true>;
+						}
+						else
+						{
+							if (opt_wnorm) func = &strconv_t<false, false, true, true, true>;
+							else func = &strconv_t<false, false, false, true, true>;
+						}
 					}
 				}
 				else
 				{
-					if (opt_escape)
+					if (opt_trim)
 					{
-						if (opt_wnorm) func = &strconv_t<false, true, true, true>;
-						else func = &strconv_t<false, true, false, true>;
+						if (opt_escape)
+						{
+							if (opt_wnorm) func = &strconv_t<true, true, true, false, true>;
+							else func = &strconv_t<true, true, false, false, true>;
+						}
+						else
+						{
+							if (opt_wnorm) func = &strconv_t<true, false, true, false, true>;
+							else func = &strconv_t<true, false, false, false, true>;
+						}
 					}
 					else
 					{
-						if (opt_wnorm) func = &strconv_t<false, false, true, true>;
-						else func = &strconv_t<false, false, false, true>;
+						if (opt_escape)
+						{
+							if (opt_wnorm) func = &strconv_t<false, true, true, false, true>;
+							else func = &strconv_t<false, true, false, false, true>;
+						}
+						else
+						{
+							if (opt_wnorm) func = &strconv_t<false, false, true, false, true>;
+							else func = &strconv_t<false, false, false, false, true>;
+							}
 					}
 				}
 			}
 			else
 			{
-				if (opt_trim)
+				if (opt_wconv)
 				{
-					if (opt_escape)
+					if (opt_trim)
 					{
-						if (opt_wnorm) func = &strconv_t<true, true, true, false>;
-						else func = &strconv_t<true, true, false, false>;
+						if (opt_escape)
+						{
+							if (opt_wnorm) func = &strconv_t<true, true, true, true, false>;
+							else func = &strconv_t<true, true, false, true, false>;
+						}
+						else
+						{
+							if (opt_wnorm) func = &strconv_t<true, false, true, true, false>;
+							else func = &strconv_t<true, false, false, true, false>;
+						}
 					}
 					else
 					{
-						if (opt_wnorm) func = &strconv_t<true, false, true, false>;
-						else func = &strconv_t<true, false, false, false>;
+						if (opt_escape)
+						{
+							if (opt_wnorm) func = &strconv_t<false, true, true, true, false>;
+							else func = &strconv_t<false, true, false, true, false>;
+						}
+						else
+						{
+							if (opt_wnorm) func = &strconv_t<false, false, true, true, false>;
+							else func = &strconv_t<false, false, false, true, false>;
+						}
 					}
 				}
 				else
 				{
-					if (opt_escape)
+					if (opt_trim)
 					{
-						if (opt_wnorm) func = &strconv_t<false, true, true, false>;
-						else func = &strconv_t<false, true, false, false>;
+						if (opt_escape)
+						{
+							if (opt_wnorm) func = &strconv_t<true, true, true, false, false>;
+							else func = &strconv_t<true, true, false, false, false>;
+						}
+						else
+						{
+							if (opt_wnorm) func = &strconv_t<true, false, true, false, false>;
+							else func = &strconv_t<true, false, false, false, false>;
+						}
 					}
 					else
 					{
-						if (opt_wnorm) func = &strconv_t<false, false, true, false>;
-						else func = &strconv_t<false, false, false, false>;
+						if (opt_escape)
+						{
+							if (opt_wnorm) func = &strconv_t<false, true, true, false, false>;
+							else func = &strconv_t<false, true, false, false, false>;
+						}
+						else
+						{
+							if (opt_wnorm) func = &strconv_t<false, false, true, false, false>;
+							else func = &strconv_t<false, false, false, false, false>;
+						}
 					}
 				}
 			}
@@ -526,8 +605,8 @@ namespace
 			void (*strconv_pcdata)(char**);
 			void (*strconv_attribute)(char**);
 
-			strconv_setup(strconv_attribute, OPTSET(parse_trim_attribute), OPTSET(parse_escapes_attribute), OPTSET(parse_wnorm_attribute), OPTSET(parse_wconv_attribute));
-			strconv_setup(strconv_pcdata, OPTSET(parse_trim_pcdata), OPTSET(parse_escapes_pcdata), OPTSET(parse_wnorm_pcdata), false);
+			strconv_setup(strconv_attribute, OPTSET(parse_trim_attribute), OPTSET(parse_escapes_attribute), OPTSET(parse_wnorm_attribute), OPTSET(parse_wconv_attribute), OPTSET(parse_eol_attribute));
+			strconv_setup(strconv_pcdata, OPTSET(parse_trim_pcdata), OPTSET(parse_escapes_pcdata), OPTSET(parse_wnorm_pcdata), false, OPTSET(parse_eol_pcdata));
 
 			char ch = 0; // Current char, in cases where we must null-terminate before we test.
 			xml_node_struct* cursor = xmldoc; // Tree node cursor.
@@ -622,7 +701,7 @@ namespace
 
 									if (OPTSET(parse_eol_cdata))
 									{
-										strconv_t<false, false, false, false>(&cursor->value);
+										strconv_t<false, false, false, false, true>(&cursor->value);
 									}
 
 									POPNODE(); // Pop since this is a standalone.
@@ -860,10 +939,13 @@ namespace
 			}
 			return s;
 		}
+		
+	private:
+		const xml_parser_impl& operator=(const xml_parser_impl&);
 	};
 
 	// Compare lhs with [rhs_begin, rhs_end)
-	int strcmprange(const char* lhs, const char* rhs_begin, const char* rhs_end)
+	static int strcmprange(const char* lhs, const char* rhs_begin, const char* rhs_end)
 	{
 		while (*lhs && rhs_begin != rhs_end && *lhs == *rhs_begin)
 		{
@@ -876,7 +958,7 @@ namespace
 	}
 	
 	// Character set pattern match.
-	int strcmpwild_cset(const char** src, const char** dst)
+	static int strcmpwild_cset(const char** src, const char** dst)
 	{
 		int find = 0, excl = 0, star = 0;
 		
@@ -909,22 +991,14 @@ namespace
 	
 		return find;
 	}
-}
 
-namespace pugi
-{
 	namespace impl
 	{
 		int strcmpwild(const char* src, const char* dst);
 	}
-}
-
-namespace
-{
-	using namespace pugi;
 
 	// Wildcard pattern match.
-	int strcmpwild_astr(const char** src, const char** dst)
+	static int strcmpwild_astr(const char** src, const char** dst)
 	{
 		int find = 1;
 		++(*src);
@@ -952,10 +1026,7 @@ namespace
 			return find;
 		}
 	}
-}
 
-namespace pugi
-{
 	namespace impl
 	{
 		// Compare two strings, with globbing, and character sets.
@@ -977,16 +1048,18 @@ namespace pugi
 		}
 	}
 
-	extern "C"
+	int strcmp(const char* lhs, const char* rhs)
 	{
-		int strcmpwildimpl(const char* src, const char* dst)
-		{
-			return impl::strcmpwild(src, dst);
-		}
+		return ::strcmp(lhs, rhs);
+	}
 
-		typedef int (*strcmpfunc)(const char*, const char*);
+	int strcmpwildimpl(const char* src, const char* dst)
+	{
+		return impl::strcmpwild(src, dst);
 	}
 
+	typedef int (*strcmpfunc)(const char*, const char*);
+
 	xml_attribute_struct::xml_attribute_struct(): name(0), value(0), prev_attribute(0), next_attribute(0)
 	{
 	}
@@ -1355,12 +1428,23 @@ namespace pugi
 
 	const char* xml_node::child_value() const
 	{
-		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
-			if ((i->type == node_pcdata || i->type == node_cdata) && i->value)
-				return i->value;
+		if (!empty())
+			for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+				if ((i->type == node_pcdata || i->type == node_cdata) && i->value)
+					return i->value;
 		return "";
 	}
 
+	const char* xml_node::child_value(const char* name) const
+	{
+		return child(name).child_value();
+	}
+
+	const char* xml_node::child_value_w(const char* name) const
+	{
+		return child_w(name).child_value();
+	}
+
 	xml_attribute xml_node::first_attribute() const
 	{
 		return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
diff --git a/src/pugixml.hpp b/src/pugixml.hpp
index 026e6ba..746f6c3 100644
--- a/src/pugixml.hpp
+++ b/src/pugixml.hpp
@@ -46,10 +46,13 @@ namespace pugi
 	const unsigned int parse_wnorm_pcdata		= 0x00000200; ///< Normalize spaces in pcdata
 	const unsigned int parse_wnorm_attribute	= 0x00000400; ///< Normalize spaces in attributes
 	const unsigned int parse_wconv_attribute	= 0x00000800; ///< Convert space-like characters to spaces in attributes (only if wnorm is not set)
-	const unsigned int parse_eol_cdata			= 0x00001000; ///< Perform EOL handling in CDATA sections
-	const unsigned int parse_check_end_tags		= 0x00002000; ///< Check start and end tag names and return error if names mismatch
-	const unsigned int parse_match_end_tags		= 0x00004000; ///< Try to find corresponding start tag for an end tag
-	const unsigned int parse_default			= 0x0000FFFF & ~parse_ws_pcdata; ///< Set all flags, except parse_ws_pcdata
+	const unsigned int parse_eol_pcdata			= 0x00001000; ///< Perform EOL handling in pcdata
+	const unsigned int parse_eol_attribute		= 0x00002000; ///< Perform EOL handling in attrobites
+	const unsigned int parse_eol_cdata			= 0x00004000; ///< Perform EOL handling in CDATA sections
+	const unsigned int parse_check_end_tags		= 0x00010000; ///< Check start and end tag names and return error if names mismatch
+	const unsigned int parse_match_end_tags		= 0x00020000; ///< Try to find corresponding start tag for an end tag
+	///< Set all flags, except parse_ws_pcdata and parse_trim_attribute
+	const unsigned int parse_default			= 0x00FFFFFF & ~parse_ws_pcdata & ~parse_trim_attribute;
 	const unsigned int parse_noset				= 0x80000000; ///< Parse with flags in xml_parser
 
 	const unsigned int parse_w3c				= parse_pi | parse_comments | parse_cdata |
@@ -266,6 +269,13 @@ namespace pugi
 		/// Return PCDATA/CDATA that is child of current node. If none, return empty string.
 		const char* child_value() const;
 
+		/// Return PCDATA/CDATA that is child of specified child node. If none, return empty string.
+		const char* child_value(const char* name) const;
+
+		/// Return PCDATA/CDATA that is child of specified child node. If none, return empty string.
+		/// Enable wildcard matching.
+		const char* child_value_w(const char* name) const;
+
 	public:
 		/// Access node's first attribute if any, else xml_attribute()
 		xml_attribute first_attribute() const;
@@ -472,7 +482,7 @@ namespace pugi
 		std::vector<char>	_buffer; ///< character buffer
 
 		xml_memory_block	_memory; ///< Memory block
-
+		
 		xml_node_struct*	_xmldoc; ///< Pointer to current XML document tree root.
 		unsigned int		_optmsk; ///< Parser options.
author	arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>	2006-11-06 18:38:04 +0000
committer	arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>	2006-11-06 18:38:04 +0000
commit	9a5d7f62fdfb7f9c9f9354758777a3a881b25638 (patch)
tree	77083aa5173231be69e965ea8136d6d5def6ac1a /src
parent	69cc3fcb3a28d4b7f69dfa5f4dcc025eb53332d8 (diff)