From 1b87d3dcbf3ce5a6384826740dc65cf4917a1555 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Fri, 9 Dec 2011 05:24:07 +0000 Subject: Introduced parse_ws_pcdata_single flag: only parses whitespace-only PCDATA if it's the only child of the parent node (middle ground between default flags and parse_ws_pcdata) git-svn-id: http://pugixml.googlecode.com/svn/trunk@825 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_parse.cpp | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'tests/test_parse.cpp') diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index d8064b2..2f66db9 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -263,6 +263,85 @@ TEST(parse_ws_pcdata_parse) CHECK_STRING(c2.first_child().value(), STR(" ")); } +static int get_tree_node_count(xml_node n) +{ + int result = 1; + + for (xml_node c = n.first_child(); c; c = c.next_sibling()) + result += get_tree_node_count(c); + + return result; +} + +TEST(parse_ws_pcdata_permutations) +{ + struct test_data_t + { + unsigned int mask; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single + const pugi::char_t* source; + const pugi::char_t* result; + int nodes; // negative if parsing should fail + }; + + test_data_t test_data[] = + { + // external pcdata should be discarded (whitespace or not) + {7, STR("ext1"), STR(""), 1}, + {7, STR(" "), STR(""), 1}, + {7, STR("ext1"), STR(""), 2}, + {7, STR("ext1ext2"), STR(""), 2}, + {7, STR(" "), STR(""), 2}, + {7, STR(" "), STR(""), 2}, + {7, STR(" "), STR(""), 2}, + // inner pcdata should be preserved + {7, STR("inner"), STR("inner"), 3}, + {7, STR("inner1inner2"), STR("inner1inner2"), 5}, + {7, STR("inner1deepinner2"), STR("inner1deepinner2"), 6}, + // empty pcdata nodes should never be created + {7, STR("inner1inner2"), STR("inner1inner2"), 5}, + {7, STR("inner2"), STR("inner2"), 4}, + {7, STR("inner1"), STR("inner1"), 4}, + {7, STR(""), STR(""), 3}, + // comments, pi or other nodes should not cause pcdata creation either + {7, STR(""), STR(""), 4}, + // leading/trailing pcdata whitespace should be preserved (note: this will change if parse_ws_pcdata_trim is introduced) + {7, STR("\t \tinner1 deep \t\ninner2\n\t"), STR("\t \tinner1 deep \t\ninner2\n\t"), 6}, + // whitespace-only pcdata preservation depends on the parsing mode + {1, STR("\n\t \n\t \n\t\n\t"), STR(""), 5}, + {2, STR("\n\t \n\t \n\t\n\t"), STR("\n\t \n\t \n\t\n\t"), 13}, + {4, STR("\n\t \n\t \n\t\n\t"), STR(" "), 7}, + // current implementation of parse_ws_pcdata_single has an unfortunate bug; reproduce it here + {4, STR("\t\t\n\n"), STR("\n\n"), 3}, + // error case: terminate PCDATA in the middle + {7, STR("abcdef"), STR("abcde"), -3}, + {7, STR(" "), STR(" "), -3}, + // error case: terminate PCDATA as early as possible + {7, STR(""), STR(""), -2}, + {7, STR("a"), STR(""), -2}, + {7, STR(" "), STR(""), -2}, + }; + + for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i) + { + const test_data_t& td = test_data[i]; + + for (int flag = 0; flag < 3; ++flag) + { + if (td.mask & (1 << flag)) + { + unsigned int flags[] = {parse_default, parse_default | parse_ws_pcdata, parse_default | parse_ws_pcdata_single}; + + xml_document doc; + CHECK((td.nodes > 0) == doc.load(td.source, flags[flag])); + CHECK_NODE(doc, td.result); + + int nodes = get_tree_node_count(doc); + CHECK((td.nodes < 0 ? -td.nodes : td.nodes) == nodes); + } + } + } +} + TEST(parse_pcdata_no_eol) { xml_document doc; -- cgit v1.2.3