From 47c15ad949eb6589ee14d208444b4e759a611143 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 11 Feb 2014 06:45:27 +0000 Subject: Implement document fragment parsing. Introduce a notable behavior change in default parsing mode: documents without a document element node are now considered invalid. This is technically a breaking change, however the amount of documents it affects is very small, all parsed data still persists, and lack of this check results in very confusing behavior in a number of cases. In order to be able to parse documents without an element node, a fragment parsing flag is introduced. Parsing a buffer in fragment mode treats the buffer as a fragment of a valid XML. As a consequence, top-level PCDATA is added to the tree; additionally, there are no restrictions on the number of nodes -- so documents without a document element are considered valid. Due to the way parsing works internally, load_buffer_inplace occasionally can not preserve the document contents if it's parsed in a fragment mode. While unfortunate, this problem is fundamental; since the use case is relatively obscure, hopefully documenting this shortcoming will be enough. git-svn-id: https://pugixml.googlecode.com/svn/trunk@980 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_parse_doctype.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tests/test_parse_doctype.cpp') diff --git a/tests/test_parse_doctype.cpp b/tests/test_parse_doctype.cpp index d7a3726..8976890 100644 --- a/tests/test_parse_doctype.cpp +++ b/tests/test_parse_doctype.cpp @@ -20,7 +20,7 @@ static xml_parse_result load_concat(xml_document& doc, const char_t* a, const ch strcat(buffer, c); #endif - return doc.load(buffer); + return doc.load(buffer, parse_fragment); } static bool test_doctype_wf(const char_t* decl) @@ -31,9 +31,9 @@ static bool test_doctype_wf(const char_t* decl) if (!load_concat(doc, decl) || !doc.first_child().empty()) return false; // pcdata pre/postfix - if (!load_concat(doc, STR("a"), decl) || !doc.first_child().empty()) return false; - if (!load_concat(doc, decl, STR("b")) || !doc.first_child().empty()) return false; - if (!load_concat(doc, STR("a"), decl, STR("b")) || !doc.first_child().empty()) return false; + if (!load_concat(doc, STR("a"), decl) || !test_node(doc, STR("a"), STR(""), format_raw)) return false; + if (!load_concat(doc, decl, STR("b")) || !test_node(doc, STR("b"), STR(""), format_raw)) return false; + if (!load_concat(doc, STR("a"), decl, STR("b")) || !test_node(doc, STR("ab"), STR(""), format_raw)) return false; // node pre/postfix if (!load_concat(doc, STR(""), decl) || !test_node(doc, STR(""), STR(""), format_raw)) return false; @@ -41,7 +41,7 @@ static bool test_doctype_wf(const char_t* decl) if (!load_concat(doc, STR(""), decl, STR("")) || !test_node(doc, STR(""), STR(""), format_raw)) return false; // check load-store contents preservation - CHECK(doc.load(decl, parse_doctype)); + CHECK(doc.load(decl, parse_doctype | parse_fragment)); CHECK_NODE(doc, decl); return true; @@ -281,8 +281,8 @@ TEST(parse_doctype_xmlconf_oasis_1) // not actually a doctype :) xml_document doc; - CHECK(doc.load(STR(" "), parse_full) && doc.first_child().type() == node_comment && doc.last_child().type() == node_comment && doc.first_child().next_sibling() == doc.last_child()); - CHECK(doc.load(STR(" &a%b&#c?>"), parse_full) && doc.first_child().type() == node_pi && doc.first_child() == doc.last_child()); + CHECK(doc.load(STR(" "), parse_full | parse_fragment) && doc.first_child().type() == node_comment && doc.last_child().type() == node_comment && doc.first_child().next_sibling() == doc.last_child()); + CHECK(doc.load(STR(" &a%b&#c?>"), parse_full | parse_fragment) && doc.first_child().type() == node_pi && doc.first_child() == doc.last_child()); } TEST(parse_doctype_xmlconf_xmltest_1) @@ -299,7 +299,7 @@ TEST(parse_doctype_xmlconf_xmltest_1) TEST_DOCTYPE_WF(" \"> ]>"); } -TEST_XML_FLAGS(parse_doctype_value, " \"> ]>", parse_minimal | parse_doctype) +TEST_XML_FLAGS(parse_doctype_value, " \"> ]>", parse_fragment | parse_doctype) { xml_node n = doc.first_child(); -- cgit v1.2.3