1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
|
#define _CRT_SECURE_NO_WARNINGS
#include "common.hpp"
#include <string.h>
#include <wchar.h>
#include <string>
static xml_parse_result load_concat(xml_document& doc, const char_t* a, const char_t* b = STR(""), const char_t* c = STR(""))
{
char_t buffer[768];
#ifdef PUGIXML_WCHAR_MODE
wcscpy(buffer, a);
wcscat(buffer, b);
wcscat(buffer, c);
#else
strcpy(buffer, a);
strcat(buffer, b);
strcat(buffer, c);
#endif
return doc.load_string(buffer, parse_fragment);
}
static bool test_doctype_wf(const char_t* decl)
{
xml_document doc;
// standalone
if (!load_concat(doc, decl) || !doc.first_child().empty()) return false;
// pcdata pre/postfix
if (!load_concat(doc, STR("a"), decl) || !test_node(doc, STR("a"), STR(""), format_raw)) return false;
if (!load_concat(doc, decl, STR("b")) || !test_node(doc, STR("b"), STR(""), format_raw)) return false;
if (!load_concat(doc, STR("a"), decl, STR("b")) || !test_node(doc, STR("ab"), STR(""), format_raw)) return false;
// node pre/postfix
if (!load_concat(doc, STR("<nodea/>"), decl) || !test_node(doc, STR("<nodea />"), STR(""), format_raw)) return false;
if (!load_concat(doc, decl, STR("<nodeb/>")) || !test_node(doc, STR("<nodeb />"), STR(""), format_raw)) return false;
if (!load_concat(doc, STR("<nodea/>"), decl, STR("<nodeb/>")) || !test_node(doc, STR("<nodea /><nodeb />"), STR(""), format_raw)) return false;
// check load-store contents preservation
CHECK(doc.load_string(decl, parse_doctype | parse_fragment));
CHECK_NODE(doc, decl);
return true;
}
static bool test_doctype_nwf(const char_t* decl)
{
xml_document doc;
// standalone
if (load_concat(doc, decl).status != status_bad_doctype) return false;
// pcdata postfix
if (load_concat(doc, decl, STR("b")).status != status_bad_doctype) return false;
// node postfix
if (load_concat(doc, decl, STR("<nodeb/>")).status != status_bad_doctype) return false;
return true;
}
#define TEST_DOCTYPE_WF(contents) CHECK(test_doctype_wf(STR(contents)))
#define TEST_DOCTYPE_NWF(contents) CHECK(test_doctype_nwf(STR(contents)))
TEST(parse_doctype_skip)
{
TEST_DOCTYPE_WF("<!DOCTYPE doc>");
TEST_DOCTYPE_WF("<!DOCTYPE doc SYSTEM 'foo'>");
TEST_DOCTYPE_WF("<!DOCTYPE doc SYSTEM \"foo\">");
TEST_DOCTYPE_WF("<!DOCTYPE doc PUBLIC \"foo\" 'bar'>");
TEST_DOCTYPE_WF("<!DOCTYPE doc PUBLIC \"foo'\">");
TEST_DOCTYPE_WF("<!DOCTYPE doc SYSTEM 'foo' [<!ELEMENT foo 'ANY'>]>");
}
TEST(parse_doctype_error)
{
TEST_DOCTYPE_NWF("<!DOCTYPE");
TEST_DOCTYPE_NWF("<!DOCTYPE doc");
TEST_DOCTYPE_NWF("<!DOCTYPE doc SYSTEM 'foo");
TEST_DOCTYPE_NWF("<!DOCTYPE doc SYSTEM \"foo");
TEST_DOCTYPE_NWF("<!DOCTYPE doc PUBLIC \"foo\" 'bar");
TEST_DOCTYPE_NWF("<!DOCTYPE doc PUBLIC \"foo'\"");
TEST_DOCTYPE_NWF("<!DOCTYPE doc SYSTEM 'foo' [<!ELEMENT foo 'ANY");
TEST_DOCTYPE_NWF("<!DOCTYPE doc SYSTEM 'foo' [<!ELEMENT foo 'ANY'>");
TEST_DOCTYPE_NWF("<!DOCTYPE doc SYSTEM 'foo' [<!ELEMENT foo 'ANY'>]");
TEST_DOCTYPE_NWF("<!DOCTYPE doc SYSTEM 'foo' [<!ELEMENT foo 'ANY'>] ");
}
// Examples from W3C recommendations
TEST(parse_doctype_w3c_wf)
{
TEST_DOCTYPE_WF("<!DOCTYPE greeting SYSTEM \"hello.dtd\">");
TEST_DOCTYPE_WF("<!DOCTYPE greeting [ <!ELEMENT greeting (#PCDATA)> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE greeting [ <!ATTLIST list type (bullets|ordered|glossary) \"ordered\"> <!ATTLIST form method CDATA #FIXED \"POST\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE greeting [ <!ENTITY % draft 'INCLUDE' > <!ENTITY % final 'IGNORE' > <![%draft;[ <!ELEMENT book (comments*, title, body, supplements?)> ]]> <![%final;[ <!ELEMENT book (title, body, supplements?)> ]]>]>");
TEST_DOCTYPE_WF("<!DOCTYPE greeting [ <!ENTITY open-hatch PUBLIC \"-//Textuality//TEXT Standard open-hatch boilerplate//EN\" \"http://www.textuality.com/boilerplate/OpenHatch.xml\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE greeting [ <!ENTITY EndAttr \"27'\" > ]>");
}
TEST(parse_doctype_w3c_nwf)
{
TEST_DOCTYPE_NWF("<!DOCTYPE greeting SYSTEM \"hello.dtd>");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting SYSTEM");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting [ <!ELEMENT greeting (#PCDATA)> ]");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting [ <!ELEMENT greeting (#PCDATA)>");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting [ <!ELEMENT greeting (#PCDATA");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting [ ");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting [ <!ATTLIST list type (bullets|ordered|glossary) \"ordered\"> ]");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting [ <!ATTLIST list type (bullets|ordered|glossary) \"ordered\">");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting [ <!ATTLIST list type (bullets|ordered|glossary) \"orde");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting [ <!ATTLIST list type (bullets|ordered|glossary) ");
TEST_DOCTYPE_NWF("<!DOCTYPE greeting [ <!ENTITY open-hatch PUBLIC \"-//Textuality//TEXT Standard open-hatch boilerplate//EN\" \"http://www.textuality.com/boilerplate/OpenHatch.x");
}
// Examples from xmlsuite
TEST(parse_doctype_xmlconf_eduni_1)
{
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!NOTATION gif SYSTEM \"file:///usr/X11R6/bin/xv\"> <!ENTITY declared SYSTEM \"xyzzy\" NDATA gif> <!ATTLIST foo bar ENTITY \"7\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!NOTATION gif SYSTEM \"file:///usr/X11R6/bin/xv\"> <!ENTITY declared SYSTEM \"xyzzy\" NDATA gif> <!ATTLIST foo bar ENTITY \"undeclared\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY % e SYSTEM \"E60.ent\"> %e; ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY e \"an &unparsed; entity\"> <!NOTATION gif SYSTEM \"file:///usr/X11R6/bin/xv\"> <!ENTITY unparsed SYSTEM \"xyzzy\" NDATA gif> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo bar CDATA #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo xml:lang CDATA #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY e SYSTEM \"E38.ent\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo SYSTEM \"E36.dtd\">");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ELEMENT bar (foo|foo)> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!NOTATION one SYSTEM \"file:///usr/bin/awk\"> <!ATTLIST foo bar NOTATION (one|one) #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo bar (one|one) #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo xml:lang NMTOKEN #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY gt \">\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo bar NMTOKENS #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY % pe SYSTEM \"subdir1/E18-pe\"> %pe; %intpe; ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo (PCDATA|foo)*> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo (foo*)> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo (foo*)> <!ENTITY space \"&#32;\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo (foo*)> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo (foo*)> <!ENTITY space \" \"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo EMPTY> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo EMPTY> <!ENTITY empty \"\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <![INCLUDE[<!ATTLIST foo bar CDATA #IMPLIED>]]> <![IGNORE[some junk]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY % pe \"hello\"> <!-- If forward were expanded when ent was declared, we were get an error, but it is bypassed and not expanded until ent is used in the instance --> <!ENTITY ent \"%pe; ! &forward;\"> <!ENTITY forward \"goodbye\"> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY % e \"bar CDATA #IMPLIED>\"> <!ATTLIST foo %e;");
TEST_DOCTYPE_NWF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY % e \"bar CDATA #IMPLIED>\"> <!ATTLIST foo %e; ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ENTITY ent SYSTEM 'E18-ent'> ]>");
}
TEST(parse_doctype_xmlconf_eduni_2)
{
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ENTITY % pe \"<!ENTITY ent1 'text'>\"> %pe; <!ELEMENT foo ANY> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ENTITY ent SYSTEM \"ent\"> <!ELEMENT foo ANY> <!ATTLIST foo a CDATA \"contains &ent; reference\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo id ID #IMPLIED> <!ATTLIST foo ref IDREF \"undef\"> <!ATTLIST foo ent ENTITY \"undef\"> <!-- can't test NOTATION attribute, because if it's undeclared then we'll get an error for one of the enumerated values being undeclared. --> <!ENTITY ent SYSTEM \"foo\" NDATA not> <!NOTATION not SYSTEM \"not\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo EMPTY> <!ATTLIST foo a (one|two|three) \"four\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo a NOTATION (not) \"not2\"> <!NOTATION not SYSTEM \"not\"> <!NOTATION not2 SYSTEM \"not2\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo EMPTY> <!ATTLIST foo a NMTOKENS \"34+\"> ]>");
}
TEST(parse_doctype_xmlconf_eduni_3)
{
TEST_DOCTYPE_WF("<!DOCTYPE animal [ <!ELEMENT animal ANY> <?_\xd9\x9f an only legal per 5th edition extender #x65f in PITarget ?> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root ANY> <!ELEMENT \xc2\xb7_BadName EMPTY> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ENTITY e \"<X๜></X๜>\"> ]>");
}
TEST(parse_doctype_xmlconf_eduni_4)
{
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY a:b \"bogus\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo xmlns:a CDATA #IMPLIED xmlns:b NMTOKEN #IMPLIED xmlns:c CDATA #IMPLIED> <!ELEMENT bar ANY> <!ATTLIST bar a:attr CDATA #IMPLIED b:attr CDATA #IMPLIED c:attr CDATA #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo xmlns:a CDATA #IMPLIED xmlns:b CDATA #IMPLIED xmlns:c CDATA #IMPLIED> <!ELEMENT bar ANY> <!ATTLIST bar a:attr CDATA #IMPLIED b:attr CDATA #IMPLIED c:attr CDATA #IMPLIED> <!ENTITY tilde \"~\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT xmlns:foo EMPTY> ]>");
}
TEST(parse_doctype_xmlconf_eduni_5)
{
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ENTITY e \"\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo (foo*)> <!ENTITY e \"abc…def\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE foo [ <!ELEMENT foo ANY> <!ATTLIST foo bar NMTOKENS #IMPLIED> <!ENTITY val \"abc…def\"> ]>");
}
TEST(parse_doctype_xmlconf_ibm_1)
{
TEST_DOCTYPE_WF("<!DOCTYPE animal SYSTEM \"ibm32i04.dtd\" [ <!ATTLIST animal xml:space (default|preserve) 'preserve'> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (a,b)> <!ELEMENT a EMPTY> <!ELEMENT b (#PCDATA|c)* > <!ELEMENT c ANY> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (PCDATA|b)* > <!ELEMENT b (#PCDATA) > <!ATTLIST b attr1 CDATA #REQUIRED> <!ATTLIST b attr2 (abc|def) \"abc\"> <!ATTLIST b attr3 CDATA #FIXED \"fixed\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE test [ <!ELEMENT test ANY> <!ELEMENT landscape EMPTY> <!ENTITY parsedentity1 SYSTEM \"ibm56iv01.xml\"> <!ENTITY parsedentity2 SYSTEM \"ibm56iv02.xml\"> <!ATTLIST landscape sun ENTITIES #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE test [ <!ELEMENT test ANY> <!ELEMENT landscape EMPTY> <!ENTITY image1 SYSTEM \"d:\\testspec\\images\\sunset.gif\" NDATA gif> <!ENTITY image2 SYSTEM \"d:\\testspec\\images\\frontpag.gif\" NDATA gif> <!ATTLIST landscape sun ENTITIES #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE tokenizer [ <!ELEMENT tokenizer ANY> <!ATTLIST tokenizer UniqueName ID #FIXED \"AC1999\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE test [ <!ELEMENT test ANY> <!ELEMENT blob (#PCDATA)> <!NOTATION base64 SYSTEM \"mimecode\"> <!NOTATION uuencode SYSTEM \"uudecode\"> <!ATTLIST blob content-encoding NOTATION (base64|uuencode|raw|ascii) #REQUIRED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE test [ <!ELEMENT test ANY> <!ELEMENT a EMPTY> <!ELEMENT nametoken EMPTY> <!ATTLIST nametoken namevalue NMTOKEN \"@#$\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (#PCDATA)* > <!ENTITY % pe1 SYSTEM \"ibm68i04.ent\"> %pe1; ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT a EMPTY> <!ATTLIST a attr1 CDATA \"&ge1;\"> <!--* GE reference in attr default before declaration *--> <!ENTITY ge1 \"abcdef\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ENTITY ge1 \"abcdef\"> <!ELEMENT a EMPTY> <!ATTLIST a attr1 CDATA \"&ge1;\"> <!ENTITY % pe2 \"<!ATTLIST a attr2 CDATA #IMPLIED>\"> %pe3; <!--* PE reference in above doesn't match declaration *--> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (#PCDATA)> <!ATTLIST root att CDATA #IMPLIED> <!ENTITY % pe1 '<!ATTLIST root att2 CDATA \"&ge1;\">'> <!ENTITY ge1 \"attdefaultvalue\" > %pe1; <!--* notation JPGformat not declared *--> <!ENTITY ge2 SYSTEM \"image.jpg\" NDATA JPGformat> ]>");
}
TEST(parse_doctype_xmlconf_ibm_2)
{
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ENTITY gewithElemnetDecl \"<!ELEMENT bogus ANY>\"> <!ATTLIST student att1 CDATA #REQUIRED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ENTITY gewithlt \"abcd&#x3c;\"> <!ATTLIST student att1 CDATA #REQUIRED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ENTITY gewithlt \"abcd<\"> <!ATTLIST student att1 CDATA #REQUIRED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE 5A_name_starts_with_digit [ <!ELEMENT 5A_name_starts_with_digit EMPTY> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ENTITY FullName \"Snow&Man\"> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ENTITY FullName \"Snow\"Man\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ATTLIST student first CDATA #REQUIRED middle CDATA #IMPLIED last CDATA #IMPLIED > <!ENTITY myfirst \"Snow\"> <!ENTITY mymiddle \"I\"> <!ENTITY mylast \"Man\"> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE student SYSTEM 'student.DTD [ <!ELEMENT student (#PCDATA)> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ENTITY info PUBLIC '..\\info.dtd> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ENTITY info PUBLIC '..\\info'.dtd'> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ENTITY info PUBLIC \"..\\info.dtd> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ENTITY info PUBLIC \"This is a {test} \" \"student.dtd\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE aniaml [ <!ELEMENT animal ANY> <!ENTITY generalE \"leopard\"> &generalE; <!ENTITY % parameterE \"<!ELEMENT leopard EMPTY>\"> %parameterE; ] animal>");
TEST_DOCTYPE_WF("<!DOCTYPE animal SYSTEM \"ibm28an01.dtd\" [ <!ELEMENT animal (cat|tiger|leopard)+> <!NOTATION animal_class SYSTEM \"ibm29v01.txt\"> <!ELEMENT cat ANY> <!ENTITY forcat \"This is a small cat\"> <!ELEMENT tiger (#PCDATA)> <!ELEMENT small EMPTY> <!ELEMENT big EMPTY> <!ATTLIST tiger color CDATA #REQUIRED> <?sound \"This is a PI\" ?> <!-- This is a comment --> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE animal [ <!ELEMENT animal ANY> <!ENTITY % parameterE \"cat SYSTEM\"> <!NOTATION %parameterE; \"cat.txt\"> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE animal [ <!ELEMENT animal ANY> <!ENTITY % parameterE \"A music file -->\"> <!-- Parameter reference appears inside a comment in DTD --> <!-- This is %parameterE; ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE animal [ <!ELEMENT animal ANY> <!ENTITY % parameterE \"A music file ?>\"> <?music %parameterE; ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE animal [ <!ELEMENT animal ANY> <!ENTITY % parameterE \"leopard EMPTY>\"> <!ELEMENT %parameterE; ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root ANY> <!ATTLIST root attr1 CDATA #IMPLIED> <!ATTLIST root attr2 CDATA #IMPLIED> <!ENTITY withlt \"have <lessthan> inside\"> <!ENTITY aIndirect \"&withlt;\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (#PCDATA)> <!--* Mising Name S contentspec in elementdecl *--> <!ELEMENT > ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (#PCDATA)> <!ELEMENT a ANY> <!ELEMENT b ANY> <!--* extra separator in seq *--> <!ELEMENT aElement ((a|b),,a)? > ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (#PCDATA)> <!ELEMENT a ANY> <!--* Missing white space before Name in AttDef *--> <!ATTLIST a attr1 CDATA \"default\"attr2 ID #required> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE test [ <!ELEMENT test ANY> <!ELEMENT one EMPTY> <!ELEMENT two EMPTY> <!NOTATION this SYSTEM \"alpha\"> <!ATTLIST three attr NOTATION (\"this\") #IMPLIED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!-- DTD for Production 62--> <![ include [ <!ELEMENT tiger EMPTY> <!ELEMENT animal ANY> ]]> <!--Negative test with pattern1 of P62--> <!--include(Case sensitive)--> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![[INCLUDE[ <!ELEMENT tiger EMPTY> <!ELEMENT animal ANY> ]]> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <?[INCLUDE[ <!ELEMENT tiger EMPTY> <!ELEMENT animal ANY> ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![[ <!ELEMENT tiger EMPTY> <!ELEMENT animal ANY> ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![INCLUDE <!ELEMENT tiger EMPTY> <!ELEMENT animal ANY> ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![ <!ELEMENT tiger EMPTY> <!ELEMENT animal ANY> [INCLUDE ]]> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <![ INCLUDE [ <!ELEMENT tiger EMPTY> <!ELEMENT animal ANY> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <![INCLUDE[ ]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (#PCDATA)> <!--* PE referenced before declared, against WFC: entity declared --> %paaa; <!ENTITY % paaa \"<!ATTLIST root att CDATA #IMPLIED>\"> <!ENTITY aaa \"aString\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (#PCDATA)> <!ATTLIST root att CDATA #IMPLIED> <!--* missing space *--> <!ENTITY% paaa \"<!-- comments -->\"> %paaa; ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <!ELEMENT root (#PCDATA)> <!ATTLIST root att CDATA #IMPLIED> <!--* missing closing bracket *--> <!ENTITY % paaa \"<!-- comments -->\" %paaa; ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root PUBLIC \"-//W3C//DTD//EN\"\"empty.dtd\" [ <!ELEMENT root (#PCDATA)> <!ATTLIST root att CDATA #IMPLIED> ]>");
}
TEST(parse_doctype_xmlconf_ibm_3)
{
TEST_DOCTYPE_WF("<!DOCTYPE animal [ <!ELEMENT animal (cat|tiger|leopard)+> <!ELEMENT cat EMPTY> <!ELEMENT tiger (#PCDATA)> <!ELEMENT leopard ANY> <!ELEMENT small EMPTY> <!ELEMENT big EMPTY> <!ATTLIST tiger color CDATA #REQUIRED> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE book [ <!ELEMENT book ANY> <!-- This test case covers legal character ranges plus discrete legal characters for production 02. --> <?NAME target ?> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ATTLIST student first CDATA #REQUIRED middle CDATA #IMPLIED last CDATA #REQUIRED > <!ENTITY myfirst \"Snow\"> <!ENTITY mymiddle \"I\"> <!ENTITY mylast 'Man &myfirst; and &myfirst; mymiddle;.'> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student SYSTEM 'student.dtd'[ ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!ENTITY unref SYSTEM \"\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student PUBLIC \"\" \"student.dtd\"[ ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student PUBLIC '' 'student.dtd'[ ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student PUBLIC \"The big ' in it\" \"student.dtd\"[ ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student PUBLIC 'The latest version' 'student.dtd'[ ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student PUBLIC \"#x20 #xD #xA abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ -'()+,./:=?;!*#@$_% \" \"student.dtd\"[ ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!----> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <!---> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <?pi?> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <?> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE student [ <!ELEMENT student (#PCDATA)> <?MyInstruct AVOID ? BEFORE > IN PI ?> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE animal SYSTEM \"ibm28v02.dtd\" [ <!NOTATION animal_class SYSTEM \"ibm28v02.txt\"> <!ENTITY forcat \"This is a small cat\"> <!ELEMENT tiger (#PCDATA)> <!ENTITY % make_small \"<!ELEMENT small EMPTY>\"> <!ENTITY % make_leopard_element \"<!ELEMENT leopard ANY>\"> <!ENTITY % make_attlist \"<!ATTLIST tiger color CDATA #REQUIRED>\"> %make_leopard_element; <!ELEMENT cat ANY> %make_small; <!ENTITY % make_big \"<!ELEMENT big EMPTY>\"> %make_big; %make_attlist; <?sound \"This is a PI\" ?> <!-- This is a valid test file for p28 --> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE animal [ <![INCLUDE[ <!ENTITY % rootElement \"<!ELEMENT animal ANY>\"> ]]> %rootElement; <!-- Following is a makupdecl --> <!ENTITY % make_tiger_element \"<!ELEMENT tiger EMPTY>\"> %make_tiger_element; <![IGNORE[ <!ELEMENT animal EMPTY> ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT root (a,b)> <!ELEMENT a EMPTY> <!ELEMENT b (#PCDATA|c)* > <!ELEMENT c ANY> <!ENTITY inContent \"<b>General entity reference in element content</b>\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT a EMPTY> <!ELEMENT b (#PCDATA|c)* > <!ELEMENT c ANY> <!--* PE replace Text have both parentheses *--> <!ENTITY % seq1 \"(a,b,c)\"> <!ELEMENT child1 %seq1; > <!--* Another legal PE replace Text *--> <!ENTITY % seq2 \"a,b\"> <!ELEMENT child2 (%seq2;,c) > ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![IGNORE[ Everything is ignored within an ignored section, except the sub-section delimiters '<![' and ']]>'. These must be balanced <!ok ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![IGNORE[ Everything is ignored within an ignored section, except the sub-section delimiters '<![' and ']]>'. These must be balanced <![ <!ELEMENT animal EMPTY> ]]> ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![IGNORE[ begin Everything is ignored within an ignored section, except the sub-section delimiters '<![' and ']]>'. These must be balanced <![ <!ELEMENT animal EMPTY> ]]> nesting <![ <!ELEMENT tiger (#PCDATA)> ]]> nesting again <![ <!ELEMENT abc ANY> ]]> end ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!DOCTYPE root SYSTEM \"ibm69v01.dtd\" [ <!ELEMENT root (#PCDATA|a)* > <!ENTITY % pe1 \"<!-- comment in PE -->\"> %pe1; ]> ]>");
}
TEST(parse_doctype_xmlconf_oasis_1)
{
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT doc EMPTY> <!ENTITY % ent1 \"\"> <!ENTITY ent2 \"text2\"> <!ENTITY % ent3 \"<!-- <!DOCTYPE <!ELEMENT <? '''"&ent2; %ent1;\"> <!ENTITY % ent4 '\"\"''\"'> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![INCLUDE[ <!ENTITY % rootel \"<!ELEMENT doc EMPTY>\"> ]]> %rootel; <!ATTLIST doc att CDATA #IMPLIED> <![IGNORE[ <!ELEMENT doc (a)> ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![INCLUDE[<![INCLUDE[ <![IGNORE[ ignored ]]> <!ELEMENT doc EMPTY> ]]>]]> <![IGNORE[ ignored ]]> <![IGNORE[ <!ELEMENT doc ignored ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <![INCLUDE[ <![ INCLUDE [ <!ELEMENT doc EMPTY> <![IGNORE[asdfasdf]]> ]]>]]> <![INCLUDE[]]> <![INCLUDE[ ]]> <![INCLUDE[ ]]> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <!ELEMENT doc EMPTY> <![IGNORE[<![]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT doc EMPTY> <![IGNORE[ <![INCLUDE[ <!ELEMENT doc ]]>]]> <![ IGNORE [ ]]> <![IGNORE[]]> <![IGNORE[ ]]> <![IGNORE[ ]]> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <!ELEMENT doc EMPTY> <![IGNORE[ <![ starts must balance ]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT doc EMPTY> <![IGNORE[ Everything is ignored within an ignored section, except the sub-section delimiters '<![' and ']]>'. These must be balanced, but it is no section keyword is required: <![]]> <![DUNNO[ ]]> <![INCLUDE[ asdfasdfasdf <!OK ]]> ] ]> ]] > ]]> <![IGNORE[ < ![ <! [ <![]]>]]> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ELEMENT doc EMPTY> <!NOTATION not1 SYSTEM \"a%a&b�<!ELEMENT<!--<?</>?>/\''\"> <!NOTATION not2 SYSTEM 'a b\"\"\"'> <!NOTATION not3 SYSTEM \"\"> <!NOTATION not4 SYSTEM ''> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ELEMENT doc EMPTY> <!NOTATION not1 PUBLIC \"<\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ELEMENT doc EMPTY> <!NOTATION not1 PUBLIC \"a b cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\"> <!NOTATION not2 PUBLIC '0123456789-()+,./:=?;!*#@$_%'> <!NOTATION not3 PUBLIC \"0123456789-()+,.'/:=?;!*#@$_%\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE doc SYSTEM \"p31pass1.dtd\" [<!ELEMENT doc EMPTY>]>");
// not actually a doctype :)
xml_document doc;
CHECK(doc.load_string(STR("<!--a <!DOCTYPE <?- ]]>-<[ CDATA [ \"- -'- -<doc>--> <!---->"), parse_full | parse_fragment) && doc.first_child().type() == node_comment && doc.last_child().type() == node_comment && doc.first_child().next_sibling() == doc.last_child());
CHECK(doc.load_string(STR("<?xmla <!DOCTYPE <[ CDATA [</doc> &a%b&#c?>"), parse_full | parse_fragment) && doc.first_child().type() == node_pi && doc.first_child() == doc.last_child());
}
TEST(parse_doctype_xmlconf_xmltest_1)
{
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <![ INCLUDE [ <!ELEMENT doc (#PCDATA)> ]> ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <!ELEMENT doc (#PCDATA)> <![ IGNORE [");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <!ELEMENT doc (#PCDATA)> <![ IGNORE [ ]>");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <!ELEMENT doc (#PCDATA)> <![ INCLUDE [");
TEST_DOCTYPE_NWF("<!DOCTYPE root [ <!ELEMENT doc (#PCDATA)> <![ INCLUDE [ ]>");
TEST_DOCTYPE_WF("<!DOCTYPE root [ <!ELEMENT doc EMPTY> <!ENTITY % e \"<!--\"> %e; -->");
TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!NOTATION foo PUBLIC \"[\" \"null.ent\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ATTLIST doc a CDATA #IMPLIED> <!ENTITY e '\"'> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ENTITY e \"<foo a='&'></foo>\"> ]>");
TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>");
}
TEST_XML_FLAGS(parse_doctype_value, "<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>", parse_fragment | parse_doctype)
{
xml_node n = doc.first_child();
CHECK(n.type() == node_doctype);
CHECK_STRING(n.value(), STR("doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]"));
}
TEST(parse_doctype_error_toplevel)
{
xml_document doc;
CHECK(doc.load_string(STR("<node><!DOCTYPE></node>")).status == status_bad_doctype);
CHECK(doc.load_string(STR("<node><!DOCTYPE></node>"), parse_doctype).status == status_bad_doctype);
}
TEST(parse_doctype_error_ignore)
{
xml_document doc;
CHECK(doc.load_string(STR("<!DOCTYPE root [ <![IGNORE[ ")).status == status_bad_doctype);
CHECK(doc.load_string(STR("<!DOCTYPE root [ <![IGNORE[ "), parse_doctype).status == status_bad_doctype);
CHECK(doc.load_string(STR("<!DOCTYPE root [ <![IGNORE[ <![INCLUDE[")).status == status_bad_doctype);
CHECK(doc.load_string(STR("<!DOCTYPE root [ <![IGNORE[ <![INCLUDE["), parse_doctype).status == status_bad_doctype);
}
TEST(parse_doctype_stackless_group)
{
std::basic_string<char_t> str;
int count = 100000;
str += STR("<!DOCTYPE ");
for (int i = 0; i < count; ++i)
str += STR("<!G ");
for (int j = 0; j < count; ++j)
str += STR(">");
str += STR(">");
xml_document doc;
CHECK(doc.load_string(str.c_str(), parse_fragment));
}
TEST(parse_doctype_stackless_ignore)
{
std::basic_string<char_t> str;
int count = 100000;
str += STR("<!DOCTYPE ");
for (int i = 0; i < count; ++i)
str += STR("<![IGNORE[ ");
for (int j = 0; j < count; ++j)
str += STR("]]>");
str += STR(">");
xml_document doc;
CHECK(doc.load_string(str.c_str(), parse_fragment));
}
|