From d2443e8948ac237d02df51810d74343ae3a0551f Mon Sep 17 00:00:00 2001
From: "arseny.kapoulkine"
 <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>
Date: Mon, 5 Jan 2009 20:23:14 +0000
Subject: Document saving improvements - no escaping is done for ' character or
 for symbols in second half of ASCII table; format_utf8 is therefore gone

git-svn-id: http://pugixml.googlecode.com/svn/trunk@95 99668b35-9821-0410-8761-19e4c4f06640
---
 src/pugixml.cpp | 42 +++++++++++++++---------------------------
 src/pugixml.hpp | 17 ++++-------------
 2 files changed, 19 insertions(+), 40 deletions(-)

(limited to 'src')
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index be3de07..0646f8b 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -405,6 +405,13 @@ namespace
 		return reinterpret_cast<const char*>(str);
 	}
 
+	template <bool _1> struct opt1_to_type
+	{
+		static const bool o1;
+	};
+
+	template <bool _1> const bool opt1_to_type<_1>::o1 = _1;
+
 	template <bool _1, bool _2> struct opt2_to_type
 	{
 		static const bool o1;
@@ -428,18 +435,17 @@ namespace
 	template <bool _1, bool _2, bool _3, bool _4> const bool opt4_to_type<_1, _2, _3, _4>::o4 = _4;
 
 #ifndef PUGIXML_NO_STL
-	template <typename opt2> void text_output_escaped(std::ostream& os, const char* s, opt2)
+	template <typename opt1> void text_output_escaped(std::ostream& os, const char* s, opt1)
 	{
-		const bool attribute = opt2::o1;
-		const bool utf8 = opt2::o2;
+		const bool attribute = opt1::o1;
 
 		while (*s)
 		{
 			const char* prev = s;
 			
 			// While *s is a usual symbol
-			while (*s && *s != '&' && *s != '<' && *s != '>' && ((*s != '"' && *s != '\'') || !attribute)
-					&& (*s >= 32 || (*s == '\r' && !attribute) || (*s == '\n' && !attribute) || *s == '\t'))
+			while (*s && *s != '&' && *s != '<' && *s != '>' && (*s != '"' || !attribute)
+					&& (*s < 0 || *s >= 32 || (*s == '\r' && !attribute) || (*s == '\n' && !attribute) || *s == '\t'))
 				++s;
 		
 			if (prev != s) os.write(prev, static_cast<std::streamsize>(s - prev));
@@ -463,10 +469,6 @@ namespace
 					os << "&quot;";
 					++s;
 					break;
-				case '\'':
-					os << "&apos;";
-					++s;
-					break;
 				case '\r':
 					os << "&#13;";
 					++s;
@@ -477,12 +479,7 @@ namespace
 					break;
 				default: // s is not a usual symbol
 				{
-					unsigned int ch;
-					
-					if (utf8)
-						s = strutf8_utf16(s, ch);
-					else
-						ch = (unsigned char)*s++;
+					unsigned int ch = (unsigned char)*s++;
 
 					os << "&#" << ch << ";";
 				}
@@ -2209,10 +2206,7 @@ namespace pugi
 			{
 				os << ' ' << a.name() << "=\"";
 
-				if (flags & format_utf8)
-					text_output_escaped(os, a.value(), opt2_to_type<1, 1>());
-				else
-					text_output_escaped(os, a.value(), opt2_to_type<1, 0>());
+				text_output_escaped(os, a.value(), opt1_to_type<1>());
 
 				os << "\"";
 			}
@@ -2235,10 +2229,7 @@ namespace pugi
 			{
 				os << ">";
 				
-				if (flags & format_utf8)
-					text_output_escaped(os, first_child().value(), opt2_to_type<0, 1>());
-				else
-					text_output_escaped(os, first_child().value(), opt2_to_type<0, 0>());
+				text_output_escaped(os, first_child().value(), opt1_to_type<0>());
 					
 				os << "</" << name() << ">\n";
 			}
@@ -2259,10 +2250,7 @@ namespace pugi
 		}
 		
 		case node_pcdata:
-			if (flags & format_utf8)
-				text_output_escaped(os, value(), opt2_to_type<0, 1>());
-			else
-				text_output_escaped(os, value(), opt2_to_type<0, 0>());
+			text_output_escaped(os, value(), opt1_to_type<0>());
 			break;
 
 		case node_cdata:
diff --git a/src/pugixml.hpp b/src/pugixml.hpp
index 5ce3fcd..5276447 100644
--- a/src/pugixml.hpp
+++ b/src/pugixml.hpp
@@ -174,21 +174,12 @@ namespace pugi
 	 */
 	const unsigned int format_indent	= 0x01;
 	
-	/**
-	 * This flag determines how the non-printable symbols are written to output stream - they are
-	 * either considered UTF-8 and are written as UTF-8 character, escaped with &#...;, or they are
-	 * considered to be ASCII and each ASCII character is escaped separately.
-	 *
-	 * This flag is on by default.
-	 */
-	const unsigned int format_utf8		= 0x02;
-	
 	/**
 	 * This flag determines if UTF-8 BOM is to be written to output stream.
 	 *
 	 * This flag is off by default.
 	 */
-	const unsigned int format_write_bom	= 0x04;
+	const unsigned int format_write_bom	= 0x02;
 	
 	/**
 	 * If this flag is on, no indentation is performed and no line breaks are written to output file.
@@ -196,13 +187,13 @@ namespace pugi
 	 *
 	 * This flag is off by default.
 	 */
-	const unsigned int format_raw		= 0x08;
+	const unsigned int format_raw		= 0x04;
 	
 	/**
 	 * This is the default set of formatting flags. It includes indenting nodes depending on their
-	 * depth in DOM tree and considering input data to be UTF-8.
+	 * depth in DOM tree.
 	 */
-	const unsigned int format_default	= format_indent | format_utf8;
+	const unsigned int format_default	= format_indent;
 		
 	// Forward declarations
 	struct xml_attribute_struct;
-- 
cgit v1.2.3