Introduced encoding_latin1 support (conversion on loading, conversion on saving, encoding name in declaration in document::save)

git-svn-id: http://pugixml.googlecode.com/svn/trunk@829 99668b35-9821-0410-8761-19e4c4f06640
author: arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> 2011-12-20 09:45:10 +0000
committer: arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> 2011-12-20 09:45:10 +0000
commit: a0769dfe380ad7e4bb3c47dc6b32099e3a4918be (patch)
tree: 41072780c1afaa7a16797ed73aece4cab3eeec28 /tests
parent: 5a312a8ea8ab78c9cf8eba7cb299dafb6985800c (diff)
4 files changed, 61 insertions, 10 deletions
diff --git a/tests/data/latintest_latin1.xml b/tests/data/latintest_latin1.xml
new file mode 100644
index 0000000..3336f0c
--- /dev/null
+++ b/tests/data/latintest_latin1.xml
@@ -0,0 +1 @@
+<?xml version="1.0" encoding="ISO-8859-1"?><EXAMPLE><!--This is a comment with special chars: <äöü>--><ORDER version="1.0" xml:lang="de"><!--This is another comment with special chars: <äöü>--><HEADER><X_ORDER_ID>0000053535</X_ORDER_ID><CUSTOMER_ID>1010</CUSTOMER_ID><NAME_1>Müller</NAME_1><NAME_2>Jörg</NAME_2></HEADER><ENTRIES><ENTRY><ARTICLE>&lt;Test&gt;</ARTICLE><ENTRY_NO>10</ENTRY_NO></ENTRY><ENTRY><ARTICLE>&lt;Test 2&gt;</ARTICLE><ENTRY_NO>20</ENTRY_NO></ENTRY></ENTRIES><FOOTER><TEXT>This is a text.</TEXT></FOOTER></ORDER></EXAMPLE>
+\ No newline at end of file
diff --git a/tests/data/latintest_utf8.xml b/tests/data/latintest_utf8.xml
new file mode 100644
index 0000000..3efad30
--- /dev/null
+++ b/tests/data/latintest_utf8.xml
@@ -0,0 +1 @@
+ï»¿<?xml version="1.0" encoding="ISO-8859-1"?><EXAMPLE><!--This is a comment with special chars: <Ã¤Ã¶Ã¼>--><ORDER version="1.0" xml:lang="de"><!--This is another comment with special chars: <Ã¤Ã¶Ã¼>--><HEADER><X_ORDER_ID>0000053535</X_ORDER_ID><CUSTOMER_ID>1010</CUSTOMER_ID><NAME_1>MÃ¼ller</NAME_1><NAME_2>JÃ¶rg</NAME_2></HEADER><ENTRIES><ENTRY><ARTICLE>&lt;Test&gt;</ARTICLE><ENTRY_NO>10</ENTRY_NO></ENTRY><ENTRY><ARTICLE>&lt;Test 2&gt;</ARTICLE><ENTRY_NO>20</ENTRY_NO></ENTRY></ENTRIES><FOOTER><TEXT>This is a text.</TEXT></FOOTER></ORDER></EXAMPLE>
+\ No newline at end of file
diff --git a/tests/test_document.cpp b/tests/test_document.cpp
index a799fbf..a49efcd 100644
--- a/tests/test_document.cpp
+++ b/tests/test_document.cpp
@@ -307,6 +307,7 @@ TEST_XML(document_save_bom, "<n/>")
 	CHECK(test_save_narrow(doc, flags, encoding_utf16_le, "\xff\xfe<\x00n\x00 \x00/\x00>\x00", 12));
 	CHECK(test_save_narrow(doc, flags, encoding_utf32_be, "\x00\x00\xfe\xff\x00\x00\x00<\x00\x00\x00n\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>", 24));
 	CHECK(test_save_narrow(doc, flags, encoding_utf32_le, "\xff\xfe\x00\x00<\x00\x00\x00n\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>\x00\x00\x00", 24));
+	CHECK(test_save_narrow(doc, flags, encoding_latin1, "<n />", 5));
 
 	// encodings synonyms
 	CHECK(save_narrow(doc, flags, encoding_utf16) == save_narrow(doc, flags, (is_little_endian() ? encoding_utf16_le : encoding_utf16_be)));
@@ -371,6 +372,15 @@ TEST_XML(document_save_declaration_present_last, "<node/>")
 	CHECK(writer.as_string() == STR("<?xml version=\"1.0\"?>\n<node />\n<?xml encoding=\"utf8\"?>\n"));
 }
 
+TEST_XML(document_save_declaration_latin1, "<node/>")
+{
+	xml_writer_string writer;
+
+	doc.save(writer, STR(""), pugi::format_default, encoding_latin1);
+
+	CHECK(writer.as_narrow() == "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<node />\n");
+}
+
 struct temp_file
 {
 	char path[512];
@@ -704,18 +714,19 @@ static bool load_file_in_memory(const char* path, char*& data, size_t& size)
 	return true;
 }
 
-TEST(document_contents_preserve)
+struct file_data_t
 {
-	struct file_t
-	{
-		const char* path;
-		xml_encoding encoding;
+    const char* path;
+    xml_encoding encoding;
+
+    char* data;
+    size_t size;
+};
 
-		char* data;
-		size_t size;
-	};
 
-	file_t files[] =
+TEST(document_contents_preserve)
+{
+	file_data_t files[] =
 	{
 		{"tests/data/utftest_utf16_be_clean.xml", encoding_utf16_be, 0, 0},
 		{"tests/data/utftest_utf16_le_clean.xml", encoding_utf16_le, 0, 0},
@@ -751,6 +762,41 @@ TEST(document_contents_preserve)
 	}
 }
 
+TEST(document_contents_preserve_latin1)
+{
+	file_data_t files[] =
+	{
+		{"tests/data/latintest_utf8.xml", encoding_utf8, 0, 0},
+		{"tests/data/latintest_latin1.xml", encoding_latin1, 0, 0}
+	};
+
+	// load files in memory
+	for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i)
+	{
+		CHECK(load_file_in_memory(files[i].path, files[i].data, files[i].size));
+	}
+
+	// convert each file to each format and compare bitwise
+	for (unsigned int src = 0; src < sizeof(files) / sizeof(files[0]); ++src)
+	{
+		for (unsigned int dst = 0; dst < sizeof(files) / sizeof(files[0]); ++dst)
+		{
+			// parse into document (preserve comments, declaration and whitespace pcdata)
+			xml_document doc;
+			CHECK(doc.load_buffer(files[src].data, files[src].size, parse_default | parse_ws_pcdata | parse_declaration | parse_comments, files[src].encoding));
+
+			// compare saved document with the original (raw formatting, without extra declaration, write bom if it was in original file)
+			CHECK(test_save_narrow(doc, format_raw | format_no_declaration | format_write_bom, files[dst].encoding, files[dst].data, files[dst].size));
+		}
+	}
+
+	// cleanup
+	for (unsigned int j = 0; j < sizeof(files) / sizeof(files[0]); ++j)
+	{
+		delete[] files[j].data;
+	}
+}
+
 static bool test_parse_fail(const void* buffer, size_t size, xml_encoding encoding = encoding_utf8)
 {
 	// copy buffer to heap (to enable out-of-bounds checks)
@@ -811,7 +857,8 @@ TEST(document_load_buffer_empty)
 		encoding_utf32_le,
 		encoding_utf32_be,
 		encoding_utf32,
-		encoding_wchar
+		encoding_wchar,
+        encoding_latin1
 	};
 
 	char buffer[1];
diff --git a/tests/test_write.cpp b/tests/test_write.cpp
index 094bf59..93f5bd9 100644
--- a/tests/test_write.cpp
+++ b/tests/test_write.cpp
@@ -189,6 +189,8 @@ TEST(write_encodings)
 	{
 		CHECK(v.size() == 10 && v[0] == '<' && v[1] == 0x54 && v[2] == 0xA2 && v[3] == 0x20AC && v[4] == 0xd852 && v[5] == 0xdf62 && v[6] == ' ' && v[7] == '/' && v[8] == '>' && v[9] == '\n');
 	}
+
+    CHECK(test_write_narrow(doc, format_default, encoding_latin1, "<\x54\xA2?? />\n", 9));
 }
 
 #ifdef PUGIXML_WCHAR_MODE
author	arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>	2011-12-20 09:45:10 +0000
committer	arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>	2011-12-20 09:45:10 +0000
commit	a0769dfe380ad7e4bb3c47dc6b32099e3a4918be (patch)
tree	41072780c1afaa7a16797ed73aece4cab3eeec28 /tests
parent	5a312a8ea8ab78c9cf8eba7cb299dafb6985800c (diff)