summaryrefslogtreecommitdiff
path: root/tests/test_document.cpp
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2014-02-11 06:45:27 +0000
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2014-02-11 06:45:27 +0000
commit47c15ad949eb6589ee14d208444b4e759a611143 (patch)
tree35822cba8d2d3c6e5384c960ff8ea503bf3cf235 /tests/test_document.cpp
parent5fa25a878aa472530cfa981d374d6e9fe4e12c7c (diff)
Implement document fragment parsing.
Introduce a notable behavior change in default parsing mode: documents without a document element node are now considered invalid. This is technically a breaking change, however the amount of documents it affects is very small, all parsed data still persists, and lack of this check results in very confusing behavior in a number of cases. In order to be able to parse documents without an element node, a fragment parsing flag is introduced. Parsing a buffer in fragment mode treats the buffer as a fragment of a valid XML. As a consequence, top-level PCDATA is added to the tree; additionally, there are no restrictions on the number of nodes -- so documents without a document element are considered valid. Due to the way parsing works internally, load_buffer_inplace occasionally can not preserve the document contents if it's parsed in a fragment mode. While unfortunate, this problem is fundamental; since the use case is relatively obscure, hopefully documenting this shortcoming will be enough. git-svn-id: https://pugixml.googlecode.com/svn/trunk@980 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'tests/test_document.cpp')
-rw-r--r--tests/test_document.cpp135
1 files changed, 100 insertions, 35 deletions
diff --git a/tests/test_document.cpp b/tests/test_document.cpp
index 3ac8bf8..e6c7b00 100644
--- a/tests/test_document.cpp
+++ b/tests/test_document.cpp
@@ -249,7 +249,7 @@ TEST(document_load_file_empty)
{
pugi::xml_document doc;
- CHECK(doc.load_file("tests/data/empty.xml"));
+ CHECK(doc.load_file("tests/data/empty.xml").status == status_no_document_element);
CHECK(!doc.first_child());
}
@@ -907,16 +907,52 @@ TEST(document_load_buffer_empty)
xml_encoding encoding = encodings[i];
xml_document doc;
- CHECK(doc.load_buffer(buffer, 0, parse_default, encoding) && !doc.first_child());
- CHECK(doc.load_buffer(0, 0, parse_default, encoding) && !doc.first_child());
+ CHECK(doc.load_buffer(buffer, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
+ CHECK(doc.load_buffer(0, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
- CHECK(doc.load_buffer_inplace(buffer, 0, parse_default, encoding) && !doc.first_child());
- CHECK(doc.load_buffer_inplace(0, 0, parse_default, encoding) && !doc.first_child());
+ CHECK(doc.load_buffer_inplace(buffer, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
+ CHECK(doc.load_buffer_inplace(0, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
void* own_buffer = pugi::get_memory_allocation_function()(1);
- CHECK(doc.load_buffer_inplace_own(own_buffer, 0, parse_default, encoding) && !doc.first_child());
- CHECK(doc.load_buffer_inplace_own(0, 0, parse_default, encoding) && !doc.first_child());
+ CHECK(doc.load_buffer_inplace_own(own_buffer, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
+ CHECK(doc.load_buffer_inplace_own(0, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
+ }
+}
+
+TEST(document_load_buffer_empty_fragment)
+{
+ xml_encoding encodings[] =
+ {
+ encoding_auto,
+ encoding_utf8,
+ encoding_utf16_le,
+ encoding_utf16_be,
+ encoding_utf16,
+ encoding_utf32_le,
+ encoding_utf32_be,
+ encoding_utf32,
+ encoding_wchar,
+ encoding_latin1
+ };
+
+ char buffer[1];
+
+ for (unsigned int i = 0; i < sizeof(encodings) / sizeof(encodings[0]); ++i)
+ {
+ xml_encoding encoding = encodings[i];
+
+ xml_document doc;
+ CHECK(doc.load_buffer(buffer, 0, parse_fragment, encoding) && !doc.first_child());
+ CHECK(doc.load_buffer(0, 0, parse_fragment, encoding) && !doc.first_child());
+
+ CHECK(doc.load_buffer_inplace(buffer, 0, parse_fragment, encoding) && !doc.first_child());
+ CHECK(doc.load_buffer_inplace(0, 0, parse_fragment, encoding) && !doc.first_child());
+
+ void* own_buffer = pugi::get_memory_allocation_function()(1);
+
+ CHECK(doc.load_buffer_inplace_own(own_buffer, 0, parse_fragment, encoding) && !doc.first_child());
+ CHECK(doc.load_buffer_inplace_own(0, 0, parse_fragment, encoding) && !doc.first_child());
}
}
@@ -933,13 +969,27 @@ TEST(document_progressive_truncation)
{
char* truncated_data = buffer + original_size - i;
- memcpy(truncated_data, original_data, i);
+ // default flags
+ {
+ memcpy(truncated_data, original_data, i);
- xml_document doc;
- bool result = doc.load_buffer_inplace(truncated_data, i);
+ xml_document doc;
+ bool result = doc.load_buffer_inplace(truncated_data, i);
+
+ // only eof is parseable
+ CHECK((i >= 3325) ? result : !result);
+ }
+
+ // fragment mode
+ {
+ memcpy(truncated_data, original_data, i);
+
+ xml_document doc;
+ bool result = doc.load_buffer_inplace(truncated_data, i, parse_default | parse_fragment);
- // some truncate locations are parseable - those that come after declaration, declaration + doctype, declaration + doctype + comment and eof
- CHECK(((i - 21) < 3 || (i - 66) < 3 || (i - 95) < 3 || i >= 3325) ? result : !result);
+ // some truncate locations are parseable - those that come after declaration, declaration + doctype, declaration + doctype + comment and eof
+ CHECK(((i - 21) < 3 || (i - 66) < 3 || (i - 95) < 3 || i >= 3325) ? result : !result);
+ }
}
delete[] buffer;
@@ -953,12 +1003,29 @@ TEST(document_load_buffer_short)
xml_document doc;
- CHECK(doc.load_buffer(data, 4));
- CHECK(doc.load_buffer(data + 1, 3));
- CHECK(doc.load_buffer(data + 2, 2));
- CHECK(doc.load_buffer(data + 3, 1));
- CHECK(doc.load_buffer(data + 4, 0));
- CHECK(doc.load_buffer(0, 0));
+ CHECK(doc.load_buffer(data, 4).status == status_no_document_element);
+ CHECK(doc.load_buffer(data + 1, 3).status == status_no_document_element);
+ CHECK(doc.load_buffer(data + 2, 2).status == status_no_document_element);
+ CHECK(doc.load_buffer(data + 3, 1).status == status_no_document_element);
+ CHECK(doc.load_buffer(data + 4, 0).status == status_no_document_element);
+ CHECK(doc.load_buffer(0, 0).status == status_no_document_element);
+
+ delete[] data;
+}
+
+TEST(document_load_buffer_short_fragment)
+{
+ char* data = new char[4];
+ memcpy(data, "abcd", 4);
+
+ xml_document doc;
+
+ CHECK(doc.load_buffer(data, 4, parse_fragment) && test_string_equal(doc.text().get(), STR("abcd")));
+ CHECK(doc.load_buffer(data + 1, 3, parse_fragment) && test_string_equal(doc.text().get(), STR("bcd")));
+ CHECK(doc.load_buffer(data + 2, 2, parse_fragment) && test_string_equal(doc.text().get(), STR("cd")));
+ CHECK(doc.load_buffer(data + 3, 1, parse_fragment) && test_string_equal(doc.text().get(), STR("d")));
+ CHECK(doc.load_buffer(data + 4, 0, parse_fragment) && !doc.first_child());
+ CHECK(doc.load_buffer(0, 0, parse_fragment) && !doc.first_child());
delete[] data;
}
@@ -970,12 +1037,12 @@ TEST(document_load_buffer_inplace_short)
xml_document doc;
- CHECK(doc.load_buffer_inplace(data, 4));
- CHECK(doc.load_buffer_inplace(data + 1, 3));
- CHECK(doc.load_buffer_inplace(data + 2, 2));
- CHECK(doc.load_buffer_inplace(data + 3, 1));
- CHECK(doc.load_buffer_inplace(data + 4, 0));
- CHECK(doc.load_buffer_inplace(0, 0));
+ CHECK(doc.load_buffer_inplace(data, 4).status == status_no_document_element);
+ CHECK(doc.load_buffer_inplace(data + 1, 3).status == status_no_document_element);
+ CHECK(doc.load_buffer_inplace(data + 2, 2).status == status_no_document_element);
+ CHECK(doc.load_buffer_inplace(data + 3, 1).status == status_no_document_element);
+ CHECK(doc.load_buffer_inplace(data + 4, 0).status == status_no_document_element);
+ CHECK(doc.load_buffer_inplace(0, 0).status == status_no_document_element);
delete[] data;
}
@@ -1006,7 +1073,7 @@ TEST_XML_FLAGS(document_element, "<?xml version='1.0'?><node><child/></node><!--
CHECK(doc.document_element() == doc.child(STR("node")));
}
-TEST_XML_FLAGS(document_element_absent, "<!---->", parse_comments)
+TEST_XML_FLAGS(document_element_absent, "<!---->", parse_comments | parse_fragment)
{
CHECK(doc.document_element() == xml_node());
}
@@ -1070,16 +1137,6 @@ TEST_XML(document_reset_copy_self, "<node><child/></node>")
CHECK_NODE(doc, STR(""));
}
-struct document_data_t
-{
- xml_encoding encoding;
-
- const unsigned char* data;
- size_t size;
-};
-
-#include <stdio.h>
-
TEST(document_load_buffer_utf_truncated)
{
const unsigned char utf8[] = {'<', 0xe2, 0x82, 0xac, '/', '>'};
@@ -1088,6 +1145,14 @@ TEST(document_load_buffer_utf_truncated)
const unsigned char utf32_be[] = {0, 0, 0, '<', 0, 0, 0x20, 0xac, 0, 0, 0, '/', 0, 0, 0, '>'};
const unsigned char utf32_le[] = {'<', 0, 0, 0, 0xac, 0x20, 0, 0, '/', 0, 0, 0, '>', 0, 0, 0};
+ struct document_data_t
+ {
+ xml_encoding encoding;
+
+ const unsigned char* data;
+ size_t size;
+ };
+
const document_data_t data[] =
{
{ encoding_utf8, utf8, sizeof(utf8) },