summaryrefslogtreecommitdiff
path: root/src/pugixml.hpp
blob: 009b10a3682e2d85a7e30dd2b24ba704bfff89a1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
///////////////////////////////////////////////////////////////////////////////
//
// Pug Improved XML Parser - Version 0.2
// --------------------------------------------------------
// Copyright (C) 2006, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
// This work is based on the pugxml parser, which is:
// Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
// Released into the Public Domain. Use at your own risk.
// See pugxml.xml for further information, history, etc.
// Contributions by Neville Franks (readonly@getsoft.com).
//
///////////////////////////////////////////////////////////////////////////////

#ifndef HEADER_PUGIXML_HPP
#define HEADER_PUGIXML_HPP

// Uncomment this to disable STL
// #define PUGIXML_NO_STL

#ifndef PUGIXML_NO_STL
#	include <string>
#	include <istream>
#endif

/// The PugiXML Parser namespace.
namespace pugi
{
	/// Tree node classification.
	/// See 'xml_node_struct::type'
	enum xml_node_type
	{
		node_null,			///< An undifferentiated entity.
		node_document,		///< A document tree's absolute root.
		node_element,		///< E.g. '<...>'
		node_pcdata,		///< E.g. '>...<'
		node_cdata,			///< E.g. '<![CDATA[...]]>'
		node_comment,		///< E.g. '<!--...-->'
		node_pi				///< E.g. '<?...?>'
	};

	/// Parser Options
	const size_t memory_block_size = 32768;		///< Memory block size, 32 kb

	const unsigned int parse_minimal			= 0x00000000; ///< Unset the following flags.
	const unsigned int parse_pi					= 0x00000001; ///< Parse '<?...?>'
	const unsigned int parse_comments			= 0x00000002; ///< Parse '<!--...-->'
	const unsigned int parse_cdata				= 0x00000004; ///< Parse '<![CDATA[...]]>'
	const unsigned int parse_ws_pcdata			= 0x00000008; ///< Skip PCDATA that consists only of whitespaces
	const unsigned int parse_ext_pcdata			= 0x00000010; ///< Skip PCDATA that is outside all tags (i.e. root)
	const unsigned int parse_trim_pcdata		= 0x00000020; ///< Trim '>...<'
	const unsigned int parse_trim_attribute		= 0x00000040; ///< Trim 'foo="..."'.
	const unsigned int parse_escapes_pcdata		= 0x00000080; ///< Parse &lt;, &gt;, &amp;, &quot;, &apos;, &#.. sequences
	const unsigned int parse_escapes_attribute 	= 0x00000100; ///< Parse &lt;, &gt;, &amp;, &quot;, &apos;, &#.. sequences
	const unsigned int parse_wnorm_pcdata		= 0x00000200; ///< Normalize spaces in pcdata
	const unsigned int parse_wnorm_attribute	= 0x00000400; ///< Normalize spaces in attributes
	const unsigned int parse_wconv_attribute	= 0x00000800; ///< Convert space-like characters to spaces in attributes (only if wnorm is not set)
	const unsigned int parse_eol_pcdata			= 0x00001000; ///< Perform EOL handling in pcdata
	const unsigned int parse_eol_attribute		= 0x00002000; ///< Perform EOL handling in attrobites
	const unsigned int parse_eol_cdata			= 0x00004000; ///< Perform EOL handling in CDATA sections
	const unsigned int parse_check_end_tags		= 0x00010000; ///< Check start and end tag names and return error if names mismatch
	const unsigned int parse_match_end_tags		= 0x00020000; ///< Try to find corresponding start tag for an end tag
	///< Set all flags, except parse_ws_pcdata, parse_trim_attribute, parse_pi and parse_comments
	const unsigned int parse_default			= 0x00FFFFFF & ~parse_ws_pcdata & ~parse_trim_attribute & ~parse_pi & ~parse_comments;
	const unsigned int parse_noset				= 0x80000000; ///< Parse with flags in xml_parser

	const unsigned int parse_w3c				= parse_pi | parse_comments | parse_cdata |
												parse_escapes_pcdata | parse_escapes_attribute |
												parse_wconv_attribute | parse_check_end_tags |
												parse_ws_pcdata | parse_eol_cdata;

	/// Forward declarations
	struct xml_attribute_struct;
	struct xml_node_struct;

	class xml_node_iterator;
	class xml_attribute_iterator;

	class xml_tree_walker;

	/// Provides a light-weight wrapper for manipulating xml_attribute_struct structures.
	///	Note: xml_attribute does not create any memory for the attribute it wraps; 
	///	it only wraps a pointer to an existing xml_attribute_struct.
	class xml_attribute
	{
		friend class xml_attribute_iterator;
		friend class xml_node;

	private:
		const xml_attribute_struct* _attr; ///< The internal attribute pointer.
	
    	/// Safe bool type
    	typedef const xml_attribute_struct* xml_attribute::*unspecified_bool_type;

		/// Initializing ctor
		explicit xml_attribute(const xml_attribute_struct* attr);

	public:
		/// Default ctor
		xml_attribute();
		
	public:
		/// Comparison operators
		bool operator==(const xml_attribute& r) const;
		bool operator!=(const xml_attribute& r) const;
		bool operator<(const xml_attribute& r) const;
		bool operator>(const xml_attribute& r) const;
		bool operator<=(const xml_attribute& r) const;
		bool operator>=(const xml_attribute& r) const;
	
    	/// Safe bool conversion
    	operator unspecified_bool_type() const;

    	/// Get next attribute if any, else xml_attribute()
    	xml_attribute next_attribute() const;

    	/// Get previous attribute if any, else xml_attribute()
    	xml_attribute previous_attribute() const;

		/// Cast attribute value as int. If not found, return 0.
		/// \return Attribute value as int, or 0.
		int as_int() const;

		/// Cast attribute value as double. If not found, return 0.0.
		/// \return Attribute value as double, or 0.0.
		double as_double() const;
	
		/// Cast attribute value as float. If not found, return 0.0.
		/// \return Attribute value as float, or 0.0.
		float as_float() const;

		/// Cast attribute value as bool. If not found, return false.
		/// \return Attribute value as bool, or false.
		bool as_bool() const;

	public:
		/// True if internal pointer is valid
		bool empty() const;

	public:
		/// Access the attribute name.
		const char* name() const;

		/// Access the attribute value.
		const char* value() const;
	};

	/// Provides a light-weight wrapper for manipulating xml_node_struct structures.
	class xml_node
	{
		friend class xml_node_iterator;
		friend class xml_parser;

	private:
		const xml_node_struct* _root; ///< Pointer to node root.

    	/// Safe bool type
    	typedef const xml_node_struct* xml_node::*unspecified_bool_type;

	private:
		/// Node is tree root.
		bool type_document() const;

	public:
		/// Default constructor.
		///	Node root points to a dummy 'xml_node_struct' structure. Test for this 
		///	with 'empty'.
		xml_node();

		/// Construct, wrapping the given 'xml_node_struct' pointer.
		explicit xml_node(const xml_node_struct* p);

	public:
		/// Base iterator type (for child nodes). Same as 'child_iterator'.
		typedef xml_node_iterator iterator;

		/// Attribute iterator type.
		typedef xml_attribute_iterator attribute_iterator;

		/// Access the begin iterator for this node's collection of child nodes.
		/// Same as 'children_begin'.
		iterator begin() const;
	
		/// Access the end iterator for this node's collection of child nodes.
		/// Same as 'children_end'.
		iterator end() const;
		
		/// Access the begin iterator for this node's collection of child nodes.
		/// Same as 'begin'.
		iterator children_begin() const;
	
		/// Access the end iterator for this node's collection of child nodes.
		/// Same as 'end'.
		iterator children_end() const;
	
		/// Access the begin iterator for this node's collection of attributes.
		attribute_iterator attributes_begin() const;
	
		/// Access the end iterator for this node's collection of attributes.
		attribute_iterator attributes_end() const;

		/// Access the begin iterator for this node's collection of siblings.
		iterator siblings_begin() const;
	
		/// Access the end iterator for this node's collection of siblings.
		iterator siblings_end() const;
	
	public:
    	/// Safe bool conversion
		operator unspecified_bool_type() const;
	
		/// Comparison operators
		bool operator==(const xml_node& r) const;
		bool operator!=(const xml_node& r) const;
		bool operator<(const xml_node& r) const;
		bool operator>(const xml_node& r) const;
		bool operator<=(const xml_node& r) const;
		bool operator>=(const xml_node& r) const;

	public:
		/// Node pointer is null, or type is node_null. Same as type_null.
		bool empty() const;

	public:
		/// Access node entity type.
		xml_node_type type() const;

		/// Access pointer to node name if any, else empty string.
		const char* name() const;

		/// Access pointer to data if any, else empty string.
		const char* value() const;
	
		/// Access child node at name as xml_node or xml_node(NULL) if bad name.
		xml_node child(const char* name) const;

		/// Access child node at name as xml_node or xml_node(NULL) if bad name.
		/// Enable wildcard matching.
		xml_node child_w(const char* name) const;

		/// Access the attribute having 'name'.
		xml_attribute attribute(const char* name) const;

		/// Access the attribute having 'name'.
		/// Enable wildcard matching.
		xml_attribute attribute_w(const char* name) const;

		/// Access sibling node at name as xml_node or xml_node(NULL) if bad name.
		xml_node sibling(const char* name) const;

		/// Access sibling node at name as xml_node or xml_node(NULL) if bad name.
		/// Enable wildcard matching.
		xml_node sibling_w(const char* name) const;

		/// Access current node's next sibling by position and name.
		xml_node next_sibling(const char* name) const;

		/// Access current node's next sibling by position and name.
		/// Enable wildcard matching.
		xml_node next_sibling_w(const char* name) const;

		/// Access current node's next sibling by position.
		xml_node next_sibling() const;

		/// Access current node's previous sibling by position and name.
		xml_node previous_sibling(const char* name) const;

		/// Access current node's previous sibling by position and name.
		/// Enable wildcard matching.
		xml_node previous_sibling_w(const char* name) const;

		/// Access current node's previous sibling by position.
		xml_node previous_sibling() const;

		/// Access node's parent if any, else xml_node(NULL)
		xml_node parent() const;

		/// Return PCDATA/CDATA that is child of current node. If none, return empty string.
		const char* child_value() const;

		/// Return PCDATA/CDATA that is child of specified child node. If none, return empty string.
		const char* child_value(const char* name) const;

		/// Return PCDATA/CDATA that is child of specified child node. If none, return empty string.
		/// Enable wildcard matching.
		const char* child_value_w(const char* name) const;

	public:
		/// Access node's first attribute if any, else xml_attribute()
		xml_attribute first_attribute() const;

		/// Access node's last attribute if any, else xml_attribute()
        xml_attribute last_attribute() const;

		/// Find all elements having the given name.
		template <typename OutputIterator> void all_elements_by_name(const char* name, OutputIterator it) const;

		/// Find all elements having the given name.
		/// Enable wildcard matching.
		template <typename OutputIterator> void all_elements_by_name_w(const char* name, OutputIterator it) const;

		/// Access node's first child if any, else xml_node()
		xml_node first_child() const;

		/// Access node's last child if any, else xml_node()
        xml_node last_child() const;
		
		/// Find attribute using the predicate
		/// Predicate should take xml_attribute and return bool.
		template <typename Predicate> xml_attribute find_attribute(Predicate pred) const;

		/// Find child using the predicate
		/// Predicate should take xml_node and return bool.
		template <typename Predicate> xml_node find_child(Predicate pred) const;

		/// Recursively-implemented depth-first find element using the predicate
		/// Predicate should take xml_node and return bool.
		template <typename Predicate> xml_node find_element(Predicate pred) const;

		/// Recursively-implemented depth-first find the first matching element. 
		/// Use for shallow drill-downs.
		xml_node first_element(const char* name) const;

		/// Recursively-implemented depth-first find the first matching element. 
		/// Use for shallow drill-downs.
		/// Enable wildcard matching.
		xml_node first_element_w(const char* name) const;

		/// Recursively-implemented depth-first find the first matching element 
		/// also having matching PCDATA.
		xml_node first_element_by_value(const char* name, const char* value) const;

		/// Recursively-implemented depth-first find the first matching element 
		/// also having matching PCDATA.
		/// Enable wildcard matching.
		xml_node first_element_by_value_w(const char* name, const char* value) const;

		/// Recursively-implemented depth-first find the first matching element 
		/// also having matching attribute.
		xml_node first_element_by_attribute(const char* name, const char* attr_name, const char* attr_value) const;

		/// Recursively-implemented depth-first find the first matching element 
		/// also having matching attribute.
		/// Enable wildcard matching.
		xml_node first_element_by_attribute_w(const char* name, const char* attr_name, const char* attr_value) const;

		/// Recursively-implemented depth-first find the first element 
		/// having matching attribute.
		xml_node first_element_by_attribute(const char* attr_name, const char* attr_value) const;

		/// Recursively-implemented depth-first find the first element 
		/// having matching attribute.
		/// Enable wildcard matching.
		xml_node first_element_by_attribute_w(const char* attr_name, const char* attr_value) const;

		/// Recursively-implemented depth-first find the first matching entity. 
		/// Use for shallow drill-downs.
		xml_node first_node(xml_node_type type) const;

#ifndef PUGIXML_NO_STL
		/// Compile the absolute node path from root as a text string.
		/// \param delimiter - Delimiter character to insert between element names.
		/// \return path string (e.g. with '/' as delimiter, '/document/.../this'.
		std::string path(char delimiter = '/') const;
#endif

		/// Search for a node by path.
		/// \param path - Path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative 
		/// to root), '../foo/bar' (pop relative position).
		/// \param delimiter - Delimiter character to use in tokenizing path.
		/// \return Matching node, or xml_node() if not found.
		xml_node first_element_by_path(const char* path, char delimiter = '/') const;

		/// Recursively traverse the tree.
		bool traverse(xml_tree_walker& walker) const;
	};

	/// Child node iterator.
	class xml_node_iterator
#ifndef PUGIXML_NO_STL
	: public std::iterator<std::bidirectional_iterator_tag, const xml_node>
#endif
	{
		friend class xml_node;

	private:
		xml_node _prev;
		xml_node _wrap;

		/// Initializing ctor
		explicit xml_node_iterator(const xml_node_struct* ref);
	public:
		/// Default ctor
		xml_node_iterator();

		/// Initializing ctor
		xml_node_iterator(const xml_node& node);

		/// Initializing ctor (for past-the-end)
		xml_node_iterator(const xml_node_struct* ref, const xml_node_struct* prev);

		bool operator==(const xml_node_iterator& rhs) const;
		bool operator!=(const xml_node_iterator& rhs) const;

		const xml_node& operator*() const;
		const xml_node* operator->() const;

		const xml_node_iterator& operator++();
		xml_node_iterator operator++(int);
		
		const xml_node_iterator& operator--();
		xml_node_iterator operator--(int);
	};

	/// Attribute iterator.
	class xml_attribute_iterator
#ifndef PUGIXML_NO_STL
	: public std::iterator<std::bidirectional_iterator_tag, const xml_attribute>
#endif
	{
		friend class xml_node;

	private:
		xml_attribute _prev;
		xml_attribute _wrap;

		/// Initializing ctor
		explicit xml_attribute_iterator(const xml_attribute_struct* ref);
	public:
		/// Default ctor
		xml_attribute_iterator();

		/// Initializing ctor
		xml_attribute_iterator(const xml_attribute& attr);

		/// Initializing ctor (for past-the-end)
		xml_attribute_iterator(const xml_attribute_struct* ref, const xml_attribute_struct* prev);

		bool operator==(const xml_attribute_iterator& rhs) const;
		bool operator!=(const xml_attribute_iterator& rhs) const;

		const xml_attribute& operator*() const;
		const xml_attribute* operator->() const;

		const xml_attribute_iterator& operator++();
		xml_attribute_iterator operator++(int);
		
		const xml_attribute_iterator& operator--();
		xml_attribute_iterator operator--(int);
	};

	/// Abstract tree walker class for xml_node::traverse().
	class xml_tree_walker
	{
	private:
		int _deep; ///< Current node depth.
	public:
		/// Default ctor
		xml_tree_walker();

		/// Virtual dtor
		virtual ~xml_tree_walker();

	public:
		/// Increment node depth.
		virtual void push();

		/// Decrement node depth
		virtual void pop();

		/// Access node depth
		virtual int depth() const;
	
	public:
		/// Callback when traverse on a node begins.
		/// \return returning false will abort the traversal.
		virtual bool begin(const xml_node&);

		/// Callback when traverse on a node ends.
		/// \return Returning false will abort the traversal.
		virtual bool end(const xml_node&);
	};

	/// Memory block (internal)
	struct xml_memory_block
	{
		xml_memory_block();

		xml_memory_block* next;
		size_t size;

		char data[memory_block_size];
	};

	struct transfer_ownership_tag {};

	/// Provides a high-level interface to the XML parser.
	class xml_parser
	{
	private:
		char*				_buffer; ///< character buffer

		xml_memory_block	_memory; ///< Memory block
		
		xml_node_struct*	_xmldoc; ///< Pointer to current XML document tree root.
		unsigned int		_optmsk; ///< Parser options.
	
		xml_parser(const xml_parser&);
		const xml_parser& operator=(const xml_parser&);

		void free();	///< free memory

	public:
		/// Constructor.
		/// \param optmsk - Options mask.
		xml_parser(unsigned int optmsk = parse_default);

		/// Parse constructor.
		/// \param xmlstr - readwrite string with xml data
		/// \param optmsk - Options mask.
		/// \see parse
		xml_parser(char* xmlstr, unsigned int optmsk = parse_default);

		/// Parse constructor that gains ownership.
		/// \param xmlstr - readwrite string with xml data
		/// \param optmsk - Options mask.
		/// \see parse
		xml_parser(const transfer_ownership_tag&, char* xmlstr, unsigned int optmsk = parse_default);

#ifndef PUGIXML_NO_STL
		/// Parse constructor.
		/// \param stream - stream with xml data
		/// \param optmsk - Options mask.
		/// \see parse
		xml_parser(std::istream& stream, unsigned int optmsk = parse_default);
#endif

		/// Dtor
		~xml_parser();

	public:
		/// Cast as xml_node (same as document).
		operator xml_node() const;

		/// Returns the root wrapped by an xml_node.
		xml_node document() const;

	public:
		/// Get parser options mask.
		unsigned int options() const;

		/// Set parser options mask.
		unsigned int options(unsigned int optmsk);

	public:
#ifndef PUGIXML_NO_STL
		/// Parse the given XML stream
		/// \param stream - stream with xml data
		/// \param optmsk - Options mask.
		void parse(std::istream& stream, unsigned int optmsk = parse_noset);
#endif

		/// Parse the given XML string in-situ.
		/// \param xmlstr - readwrite string with xml data
		/// \param optmsk - Options mask.
		/// \return last position or NULL
		/// \rem input string is zero-segmented
		char* parse(char* xmlstr, unsigned int optmsk = parse_noset);
		
		/// Parse the given XML string in-situ (gains ownership).
		/// \param xmlstr - readwrite string with xml data
		/// \param optmsk - Options mask.
		/// \return last position or NULL
		/// \rem input string is zero-segmented
		char* parse(const transfer_ownership_tag&, char* xmlstr, unsigned int optmsk = parse_noset);
	};

	/// Utility functions for xml
	
#ifndef PUGIXML_NO_STL
	/// Convert utf16 to utf8
	std::string utf8(const wchar_t* str);
	
	/// Convert utf8 to utf16
	std::wstring utf16(const char* str);
#endif
}

/// Inline implementation

namespace pugi
{
	namespace impl
	{
		int strcmpwild(const char*, const char*);
	}

	template <typename OutputIterator> void xml_node::all_elements_by_name(const char* name, OutputIterator it) const
	{
		if (empty()) return;
		
		for (xml_node node = first_child(); node; node = node.next_sibling())
		{
			if (!strcmp(name, node.name()))
			{
				*it = node;
				++it;
			}
			
			if (node.first_child()) node.all_elements_by_name(name, it);
		}
	}

	template <typename OutputIterator> void xml_node::all_elements_by_name_w(const char* name, OutputIterator it) const
	{
		if (empty()) return;
		
		for (xml_node node = first_child(); node; node = node.next_sibling())
		{
			if (!impl::strcmpwild(name, node.name()))
			{
				*it = node;
				++it;
			}
			
			if (node.first_child()) node.all_elements_by_name_w(name, it);
		}
	}
	
	template <typename Predicate> inline xml_attribute xml_node::find_attribute(Predicate pred) const
	{
		if (!empty())
			for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
				if (pred(attrib))
					return attrib;
		
		return xml_attribute();
	}

	template <typename Predicate> inline xml_node xml_node::find_child(Predicate pred) const
	{
		if (!empty())
			for (xml_node node = first_child(); node; node = node.next_sibling())
				if (pred(node))
					return node;

		return xml_node();
	}

	template <typename Predicate> inline xml_node xml_node::find_element(Predicate pred) const
	{
		if (!empty())
			for (xml_node node = first_child(); node; node = node.next_sibling())
			{
				if (pred(node))
					return node;
				
				if (node.first_child())
				{
					xml_node found = node.find_element(pred);
					if (found) return found;
				}
			}

		return xml_node();
	}
}

#endif