From d2443e8948ac237d02df51810d74343ae3a0551f Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Mon, 5 Jan 2009 20:23:14 +0000 Subject: Document saving improvements - no escaping is done for ' character or for symbols in second half of ASCII table; format_utf8 is therefore gone git-svn-id: http://pugixml.googlecode.com/svn/trunk@95 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 42 +++++++++++++++--------------------------- src/pugixml.hpp | 17 ++++------------- 2 files changed, 19 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index be3de07..0646f8b 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -405,6 +405,13 @@ namespace return reinterpret_cast(str); } + template struct opt1_to_type + { + static const bool o1; + }; + + template const bool opt1_to_type<_1>::o1 = _1; + template struct opt2_to_type { static const bool o1; @@ -428,18 +435,17 @@ namespace template const bool opt4_to_type<_1, _2, _3, _4>::o4 = _4; #ifndef PUGIXML_NO_STL - template void text_output_escaped(std::ostream& os, const char* s, opt2) + template void text_output_escaped(std::ostream& os, const char* s, opt1) { - const bool attribute = opt2::o1; - const bool utf8 = opt2::o2; + const bool attribute = opt1::o1; while (*s) { const char* prev = s; // While *s is a usual symbol - while (*s && *s != '&' && *s != '<' && *s != '>' && ((*s != '"' && *s != '\'') || !attribute) - && (*s >= 32 || (*s == '\r' && !attribute) || (*s == '\n' && !attribute) || *s == '\t')) + while (*s && *s != '&' && *s != '<' && *s != '>' && (*s != '"' || !attribute) + && (*s < 0 || *s >= 32 || (*s == '\r' && !attribute) || (*s == '\n' && !attribute) || *s == '\t')) ++s; if (prev != s) os.write(prev, static_cast(s - prev)); @@ -463,10 +469,6 @@ namespace os << """; ++s; break; - case '\'': - os << "'"; - ++s; - break; case '\r': os << " "; ++s; @@ -477,12 +479,7 @@ namespace break; default: // s is not a usual symbol { - unsigned int ch; - - if (utf8) - s = strutf8_utf16(s, ch); - else - ch = (unsigned char)*s++; + unsigned int ch = (unsigned char)*s++; os << "&#" << ch << ";"; } @@ -2209,10 +2206,7 @@ namespace pugi { os << ' ' << a.name() << "=\""; - if (flags & format_utf8) - text_output_escaped(os, a.value(), opt2_to_type<1, 1>()); - else - text_output_escaped(os, a.value(), opt2_to_type<1, 0>()); + text_output_escaped(os, a.value(), opt1_to_type<1>()); os << "\""; } @@ -2235,10 +2229,7 @@ namespace pugi { os << ">"; - if (flags & format_utf8) - text_output_escaped(os, first_child().value(), opt2_to_type<0, 1>()); - else - text_output_escaped(os, first_child().value(), opt2_to_type<0, 0>()); + text_output_escaped(os, first_child().value(), opt1_to_type<0>()); os << "\n"; } @@ -2259,10 +2250,7 @@ namespace pugi } case node_pcdata: - if (flags & format_utf8) - text_output_escaped(os, value(), opt2_to_type<0, 1>()); - else - text_output_escaped(os, value(), opt2_to_type<0, 0>()); + text_output_escaped(os, value(), opt1_to_type<0>()); break; case node_cdata: diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 5ce3fcd..5276447 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -174,21 +174,12 @@ namespace pugi */ const unsigned int format_indent = 0x01; - /** - * This flag determines how the non-printable symbols are written to output stream - they are - * either considered UTF-8 and are written as UTF-8 character, escaped with &#...;, or they are - * considered to be ASCII and each ASCII character is escaped separately. - * - * This flag is on by default. - */ - const unsigned int format_utf8 = 0x02; - /** * This flag determines if UTF-8 BOM is to be written to output stream. * * This flag is off by default. */ - const unsigned int format_write_bom = 0x04; + const unsigned int format_write_bom = 0x02; /** * If this flag is on, no indentation is performed and no line breaks are written to output file. @@ -196,13 +187,13 @@ namespace pugi * * This flag is off by default. */ - const unsigned int format_raw = 0x08; + const unsigned int format_raw = 0x04; /** * This is the default set of formatting flags. It includes indenting nodes depending on their - * depth in DOM tree and considering input data to be UTF-8. + * depth in DOM tree. */ - const unsigned int format_default = format_indent | format_utf8; + const unsigned int format_default = format_indent; // Forward declarations struct xml_attribute_struct; -- cgit v1.2.3