From 7d24b9b5655d584b6dc8b89df7cbd58d2e940a81 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Mon, 19 Jul 2010 09:57:32 +0000 Subject: Set svn:eol-style to native for all text files git-svn-id: http://pugixml.googlecode.com/svn/trunk@607 99668b35-9821-0410-8761-19e4c4f06640 --- Jamfile.jam | 318 +- contrib/foreach.hpp | 204 +- docs/manual.qbk | 4014 ++++++------- docs/manual.xsl | 234 +- docs/pugixml.css | 1196 ++-- docs/quickstart.qbk | 510 +- docs/quickstart.xsl | 16 +- docs/samples/character.xml | 16 +- docs/samples/custom_memory_management.cpp | 54 +- docs/samples/include.cpp | 128 +- docs/samples/load_error_handling.cpp | 62 +- docs/samples/load_file.cpp | 32 +- docs/samples/load_memory.cpp | 128 +- docs/samples/load_options.cpp | 60 +- docs/samples/load_stream.cpp | 194 +- docs/samples/modify_add.cpp | 64 +- docs/samples/modify_base.cpp | 86 +- docs/samples/modify_remove.cpp | 54 +- docs/samples/save_custom_writer.cpp | 232 +- docs/samples/save_file.cpp | 34 +- docs/samples/save_options.cpp | 96 +- docs/samples/save_stream.cpp | 36 +- docs/samples/save_subtree.cpp | 52 +- docs/samples/transitions.xml | 14 +- docs/samples/traverse_base.cpp | 102 +- docs/samples/traverse_iter.cpp | 54 +- docs/samples/traverse_predicate.cpp | 96 +- docs/samples/traverse_walker.cpp | 70 +- docs/samples/tree.xml | 24 +- docs/samples/weekly-shift_jis.xml | 156 +- docs/samples/weekly-utf-8.xml | 156 +- docs/samples/xgconsole.xml | 24 +- docs/samples/xpath_error.cpp | 86 +- docs/samples/xpath_query.cpp | 72 +- docs/samples/xpath_select.cpp | 54 +- readme.txt | 104 +- scripts/CMakeLists.txt | 10 +- scripts/premake4.lua | 172 +- scripts/pugixml_vs2005.vcproj | 694 +-- scripts/pugixml_vs2005_static.vcproj | 694 +-- scripts/pugixml_vs2008.vcproj | 686 +-- scripts/pugixml_vs2008_static.vcproj | 686 +-- scripts/pugixml_vs2010.vcxproj | 322 +- scripts/pugixml_vs2010_static.vcxproj | 328 +- src/pugixml.cpp | 9206 ++++++++++++++--------------- src/pugixml.hpp | 4756 +++++++-------- src/pugixpath.cpp | 7000 +++++++++++----------- tests/allocator.cpp | 188 +- tests/allocator.hpp | 20 +- tests/archive.pl | 120 +- tests/autotest-local.pl | 252 +- tests/autotest-report.pl | 398 +- tests/common.hpp | 16 +- tests/data/multiline.xml | 6 +- tests/gcov-filter.pl | 26 +- tests/helpers.hpp | 194 +- tests/main.cpp | 298 +- tests/test.cpp | 362 +- tests/test.hpp | 302 +- tests/test_deprecated.cpp | 406 +- tests/test_document.cpp | 1420 ++--- tests/test_dom_modify.cpp | 1318 ++--- tests/test_dom_traverse.cpp | 1512 ++--- tests/test_header_guard.cpp | 6 +- tests/test_header_iosfwd_1.cpp | 6 +- tests/test_header_iosfwd_2.cpp | 6 +- tests/test_header_iostream_1.cpp | 6 +- tests/test_header_iostream_2.cpp | 6 +- tests/test_header_string_1.cpp | 6 +- tests/test_header_string_2.cpp | 6 +- tests/test_memory.cpp | 258 +- tests/test_parse.cpp | 1366 ++--- tests/test_parse_doctype.cpp | 550 +- tests/test_unicode.cpp | 274 +- tests/test_write.cpp | 708 +-- tests/test_xpath.cpp | 454 +- tests/test_xpath_api.cpp | 300 +- tests/test_xpath_functions.cpp | 1494 ++--- tests/test_xpath_operators.cpp | 946 +-- tests/test_xpath_parse.cpp | 544 +- tests/test_xpath_paths.cpp | 944 +-- tests/test_xpath_paths_abbrev_w3c.cpp | 434 +- tests/test_xpath_paths_w3c.cpp | 620 +- tests/test_xpath_xalan_1.cpp | 814 +-- tests/test_xpath_xalan_2.cpp | 798 +-- tests/test_xpath_xalan_3.cpp | 638 +- tests/test_xpath_xalan_4.cpp | 596 +- tests/test_xpath_xalan_5.cpp | 586 +- tests/writer_string.cpp | 154 +- tests/writer_string.hpp | 54 +- 90 files changed, 26399 insertions(+), 26399 deletions(-) diff --git a/Jamfile.jam b/Jamfile.jam index ae986cc..63dd298 100644 --- a/Jamfile.jam +++ b/Jamfile.jam @@ -1,159 +1,159 @@ -# Latest jamplus is needed to use this - -# Targets: -# pugixml - build pugixml library -# tests - build pugixml test suite -# run_tests - run pugixml test suite -# coverage - get test suite coverage - -# Options: -# toolset=name - select toolset -# supported toolsets: mingw*, msvc* - -# default toolset/configuration -if ( ! $(toolset) ) -{ - if ( $(OS) = SOLARIS ) - { - toolset = suncc ; - } - else if ( $(UNIX) ) - { - local GCCVERSION = [ Subst [ Shell "gcc -dumpversion" ] : $(NEWLINE) ] ; - toolset = "gcc"$(GCCVERSION) ; - } - else - { - toolset = msvc ; - } -} - -if ( ! $(configuration) ) -{ - configuration = "debug" ; -} - -if ( ! $(defines) ) -{ - defines = "PUGIXML_STANDARD" ; -} - -# coverage options -if ( $(toolset:I=^mingw) || $(toolset:I=^gcc) ) -{ - CCFLAGS = -fprofile-arcs -ftest-coverage ; - LDFLAGS = -fprofile-arcs ; - GCOVFLAGS = -n ; -} - -# build folder -BUILD = build ; - -# enable dependency cache -DEPCACHE.standard = $(BUILD)/.depcache ; - -# rules -include "Jamrules.jam" ; - -# split define sets into list -local DEFINESETS = [ Split $(defines) : ':' ] ; - -# split configurations into list -local CONFIGURATIONS = [ Split $(configuration) : ',' ] ; - -for CONFIG in $(CONFIGURATIONS) -{ - for DEFINESET in $(DEFINESETS) - { - local DEFINES = [ Split $(DEFINESET) : ',' ] ; - - # build folder - local CFGBUILD = $(BUILD)/$(toolset)/$(DEFINES:J=_)/$(CONFIG) ; - - # compilation options - local CFGFLAGS = $(CCFLAGS) [ GetCFlags $(CONFIG) : $(DEFINES) ] ; - - # build library - local PUGIXML = $(CFGBUILD)/pugixml.lib ; - Library $(PUGIXML) : src/pugixml.cpp src/pugixpath.cpp : $(CFGFLAGS) ; - Alias pugixml : $(PUGIXML) ; - - # build tests - local TESTS = $(CFGBUILD)/tests.exe ; - Application $(TESTS) : [ Glob tests : *.cpp ] : $(CFGFLAGS) : $(PUGIXML) ; - Alias tests : $(TESTS) ; - - # run tests - Test $(TESTS)_run : $(TESTS) ; - Alias run_tests : $(TESTS)_run ; - - # gather coverage - Coverage $(TESTS)_coverage : $(PUGIXML) ; - Alias coverage : $(TESTS)_coverage ; - - GCOVFLAGS on $(TESTS)_coverage = $(GCOVFLAGS) -o $(CFGBUILD)/src ; # because stupid gcov can't find files via relative paths - - # add special autotest markers to build log - if $(autotest) - { - COVPREFIX on $(TESTS)_coverage = "... autotest $(CONFIG) [$(DEFINESET)]" ; - } - - # gather coverage after tests run - Depends $(TESTS)_coverage : $(TESTS)_run ; - } -} - -# documentation -Documentation docs/manual.html : docs/manual.qbk : docs/manual.xsl ; -Documentation docs/quickstart.html : docs/quickstart.qbk : docs/quickstart.xsl ; - -Alias docs : docs/manual.html docs/quickstart.html ; - -# samples -for SAMPLE in [ Glob docs/samples : *.cpp ] -{ - local CONFIG = "debug" ; - local DEFINES = "PUGIXML_STANDARD" ; - - # build folder - local CFGBUILD = $(BUILD)/$(toolset)/$(DEFINES:J=_)/$(CONFIG) ; - - # compilation options - local CFGFLAGS = $(CCFLAGS) [ GetCFlags $(CONFIG) : $(DEFINES) ] ; - CFGFLAGS += -I src ; - - # build and run sample - local EXECUTABLE = $(CFGBUILD)/samples/$(SAMPLE:S=.exe) ; - local PUGIXML = $(CFGBUILD)/pugixml.lib ; - - Application $(EXECUTABLE) : $(SAMPLE) : $(CFGFLAGS) : $(PUGIXML) ; - - RunSampleAction $(EXECUTABLE)_run : $(EXECUTABLE) ; - Depends $(EXECUTABLE)_run : $(EXECUTABLE) ; - - Depends samples : $(EXECUTABLE)_run ; -} - -# release -VERSION = 0.9 ; -RELEASE_FILES = - [ Glob contrib : *.cpp *.hpp ] - [ Glob src : *.cpp *.hpp ] - [ Glob docs : *.html *.css ] - [ Glob docs/samples : *.cpp *.hpp *.xml ] - [ Glob docs/images : *.png ] - [ Glob docs/manual : *.html ] - @("scripts/**":W=:X=svn) - readme.txt - ; - -actions ArchiveAction -{ - perl tests/archive.pl $(<) $(>) -} - -ArchiveAction pugixml-$(VERSION).zip : $(RELEASE_FILES) ; -ArchiveAction pugixml-$(VERSION).tar.gz : $(RELEASE_FILES) ; -Depends release : pugixml-$(VERSION).zip pugixml-$(VERSION).tar.gz : $(RELEASE_FILES) ; -NotFile release ; +# Latest jamplus is needed to use this + +# Targets: +# pugixml - build pugixml library +# tests - build pugixml test suite +# run_tests - run pugixml test suite +# coverage - get test suite coverage + +# Options: +# toolset=name - select toolset +# supported toolsets: mingw*, msvc* + +# default toolset/configuration +if ( ! $(toolset) ) +{ + if ( $(OS) = SOLARIS ) + { + toolset = suncc ; + } + else if ( $(UNIX) ) + { + local GCCVERSION = [ Subst [ Shell "gcc -dumpversion" ] : $(NEWLINE) ] ; + toolset = "gcc"$(GCCVERSION) ; + } + else + { + toolset = msvc ; + } +} + +if ( ! $(configuration) ) +{ + configuration = "debug" ; +} + +if ( ! $(defines) ) +{ + defines = "PUGIXML_STANDARD" ; +} + +# coverage options +if ( $(toolset:I=^mingw) || $(toolset:I=^gcc) ) +{ + CCFLAGS = -fprofile-arcs -ftest-coverage ; + LDFLAGS = -fprofile-arcs ; + GCOVFLAGS = -n ; +} + +# build folder +BUILD = build ; + +# enable dependency cache +DEPCACHE.standard = $(BUILD)/.depcache ; + +# rules +include "Jamrules.jam" ; + +# split define sets into list +local DEFINESETS = [ Split $(defines) : ':' ] ; + +# split configurations into list +local CONFIGURATIONS = [ Split $(configuration) : ',' ] ; + +for CONFIG in $(CONFIGURATIONS) +{ + for DEFINESET in $(DEFINESETS) + { + local DEFINES = [ Split $(DEFINESET) : ',' ] ; + + # build folder + local CFGBUILD = $(BUILD)/$(toolset)/$(DEFINES:J=_)/$(CONFIG) ; + + # compilation options + local CFGFLAGS = $(CCFLAGS) [ GetCFlags $(CONFIG) : $(DEFINES) ] ; + + # build library + local PUGIXML = $(CFGBUILD)/pugixml.lib ; + Library $(PUGIXML) : src/pugixml.cpp src/pugixpath.cpp : $(CFGFLAGS) ; + Alias pugixml : $(PUGIXML) ; + + # build tests + local TESTS = $(CFGBUILD)/tests.exe ; + Application $(TESTS) : [ Glob tests : *.cpp ] : $(CFGFLAGS) : $(PUGIXML) ; + Alias tests : $(TESTS) ; + + # run tests + Test $(TESTS)_run : $(TESTS) ; + Alias run_tests : $(TESTS)_run ; + + # gather coverage + Coverage $(TESTS)_coverage : $(PUGIXML) ; + Alias coverage : $(TESTS)_coverage ; + + GCOVFLAGS on $(TESTS)_coverage = $(GCOVFLAGS) -o $(CFGBUILD)/src ; # because stupid gcov can't find files via relative paths + + # add special autotest markers to build log + if $(autotest) + { + COVPREFIX on $(TESTS)_coverage = "... autotest $(CONFIG) [$(DEFINESET)]" ; + } + + # gather coverage after tests run + Depends $(TESTS)_coverage : $(TESTS)_run ; + } +} + +# documentation +Documentation docs/manual.html : docs/manual.qbk : docs/manual.xsl ; +Documentation docs/quickstart.html : docs/quickstart.qbk : docs/quickstart.xsl ; + +Alias docs : docs/manual.html docs/quickstart.html ; + +# samples +for SAMPLE in [ Glob docs/samples : *.cpp ] +{ + local CONFIG = "debug" ; + local DEFINES = "PUGIXML_STANDARD" ; + + # build folder + local CFGBUILD = $(BUILD)/$(toolset)/$(DEFINES:J=_)/$(CONFIG) ; + + # compilation options + local CFGFLAGS = $(CCFLAGS) [ GetCFlags $(CONFIG) : $(DEFINES) ] ; + CFGFLAGS += -I src ; + + # build and run sample + local EXECUTABLE = $(CFGBUILD)/samples/$(SAMPLE:S=.exe) ; + local PUGIXML = $(CFGBUILD)/pugixml.lib ; + + Application $(EXECUTABLE) : $(SAMPLE) : $(CFGFLAGS) : $(PUGIXML) ; + + RunSampleAction $(EXECUTABLE)_run : $(EXECUTABLE) ; + Depends $(EXECUTABLE)_run : $(EXECUTABLE) ; + + Depends samples : $(EXECUTABLE)_run ; +} + +# release +VERSION = 0.9 ; +RELEASE_FILES = + [ Glob contrib : *.cpp *.hpp ] + [ Glob src : *.cpp *.hpp ] + [ Glob docs : *.html *.css ] + [ Glob docs/samples : *.cpp *.hpp *.xml ] + [ Glob docs/images : *.png ] + [ Glob docs/manual : *.html ] + @("scripts/**":W=:X=svn) + readme.txt + ; + +actions ArchiveAction +{ + perl tests/archive.pl $(<) $(>) +} + +ArchiveAction pugixml-$(VERSION).zip : $(RELEASE_FILES) ; +ArchiveAction pugixml-$(VERSION).tar.gz : $(RELEASE_FILES) ; +Depends release : pugixml-$(VERSION).zip pugixml-$(VERSION).tar.gz : $(RELEASE_FILES) ; +NotFile release ; diff --git a/contrib/foreach.hpp b/contrib/foreach.hpp index c413f1d..efe6d6d 100644 --- a/contrib/foreach.hpp +++ b/contrib/foreach.hpp @@ -1,102 +1,102 @@ -/* - * Boost.Foreach support for pugixml classes. - * This file is provided to the public domain. - * Written by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) - */ - -#ifndef HEADER_PUGIXML_FOREACH_HPP -#define HEADER_PUGIXML_FOREACH_HPP - -#include "pugixml.hpp" - -/* - * These types add support for BOOST_FOREACH macro to xml_node and xml_document classes (child iteration only). - * Example usage: - * BOOST_FOREACH(xml_node n, doc) {} - */ - -namespace boost -{ - template struct range_mutable_iterator; - template struct range_const_iterator; - - template<> struct range_mutable_iterator - { - typedef pugi::xml_node::iterator type; - }; - - template<> struct range_const_iterator - { - typedef pugi::xml_node::iterator type; - }; - - template<> struct range_mutable_iterator - { - typedef pugi::xml_document::iterator type; - }; - - template<> struct range_const_iterator - { - typedef pugi::xml_document::iterator type; - }; -} - -/* - * These types add support for BOOST_FOREACH macro to xml_node and xml_document classes (child/attribute iteration). - * Example usage: - * BOOST_FOREACH(xml_node n, children(doc)) {} - * BOOST_FOREACH(xml_node n, attributes(doc)) {} - */ - -namespace pugi -{ - struct xml_node_children_adapter - { - typedef pugi::xml_node::iterator iterator; - typedef pugi::xml_node::iterator const_iterator; - - xml_node node; - - const_iterator begin() const - { - return node.begin(); - } - - const_iterator end() const - { - return node.end(); - } - }; - - xml_node_children_adapter children(const pugi::xml_node& node) - { - xml_node_children_adapter result = {node}; - return result; - } - - struct xml_node_attribute_adapter - { - typedef pugi::xml_node::attribute_iterator iterator; - typedef pugi::xml_node::attribute_iterator const_iterator; - - xml_node node; - - const_iterator begin() const - { - return node.attributes_begin(); - } - - const_iterator end() const - { - return node.attributes_end(); - } - }; - - xml_node_attribute_adapter attributes(const pugi::xml_node& node) - { - xml_node_attribute_adapter result = {node}; - return result; - } -} - -#endif +/* + * Boost.Foreach support for pugixml classes. + * This file is provided to the public domain. + * Written by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + */ + +#ifndef HEADER_PUGIXML_FOREACH_HPP +#define HEADER_PUGIXML_FOREACH_HPP + +#include "pugixml.hpp" + +/* + * These types add support for BOOST_FOREACH macro to xml_node and xml_document classes (child iteration only). + * Example usage: + * BOOST_FOREACH(xml_node n, doc) {} + */ + +namespace boost +{ + template struct range_mutable_iterator; + template struct range_const_iterator; + + template<> struct range_mutable_iterator + { + typedef pugi::xml_node::iterator type; + }; + + template<> struct range_const_iterator + { + typedef pugi::xml_node::iterator type; + }; + + template<> struct range_mutable_iterator + { + typedef pugi::xml_document::iterator type; + }; + + template<> struct range_const_iterator + { + typedef pugi::xml_document::iterator type; + }; +} + +/* + * These types add support for BOOST_FOREACH macro to xml_node and xml_document classes (child/attribute iteration). + * Example usage: + * BOOST_FOREACH(xml_node n, children(doc)) {} + * BOOST_FOREACH(xml_node n, attributes(doc)) {} + */ + +namespace pugi +{ + struct xml_node_children_adapter + { + typedef pugi::xml_node::iterator iterator; + typedef pugi::xml_node::iterator const_iterator; + + xml_node node; + + const_iterator begin() const + { + return node.begin(); + } + + const_iterator end() const + { + return node.end(); + } + }; + + xml_node_children_adapter children(const pugi::xml_node& node) + { + xml_node_children_adapter result = {node}; + return result; + } + + struct xml_node_attribute_adapter + { + typedef pugi::xml_node::attribute_iterator iterator; + typedef pugi::xml_node::attribute_iterator const_iterator; + + xml_node node; + + const_iterator begin() const + { + return node.attributes_begin(); + } + + const_iterator end() const + { + return node.attributes_end(); + } + }; + + xml_node_attribute_adapter attributes(const pugi::xml_node& node) + { + xml_node_attribute_adapter result = {node}; + return result; + } +} + +#endif diff --git a/docs/manual.qbk b/docs/manual.qbk index 17f6de1..a9e5189 100644 --- a/docs/manual.qbk +++ b/docs/manual.qbk @@ -1,2007 +1,2007 @@ -[book pugixml - [quickbook 1.5] - - [version 0.9] - [id manual] - [copyright 2010 Arseny Kapoulkine] - [license Distributed under the MIT License] -] - -[template sbr[]''''''] -[template lbr[]''''''] [/ for empty lines in lists] -[template file[name]''''''[name]''''''] -[template sref[name]''''''] -[template anchor[name]''''''[^[name]]] -[template ftnt[id text]''''''[text]''''''] - -[section:overview Overview] - -[section:introduction Introduction] - -pugixml is a light-weight C++ XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0 implementation for complex data-driven tree queries. Full Unicode support is also available, with [link manual.dom.unicode two Unicode interface variants] and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is [link manual.install.portability extremely portable] and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the MIT license, making it completely free to use in both open-source and proprietary applications. - -pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can't process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD/Schema validation, the library is not for you. - -This is the complete manual for pugixml, which describes all features of the library in detail. If you want to start writing code as quickly as possible, you are advised to [@quickstart.html read the quick start guide first]. - -[note No documentation is perfect, neither is this one. If you encounter a description that is unclear, please file an issue as described in [sref manual.overview.feedback]. Also if you can spare the time for a full proof-reading, including spelling and grammar, that would be great! Please [link email send me an e-mail]; as a token of appreciation, your name will be included into the [link manual.overview.thanks corresponding section] of this documentation.] - -[endsect] [/introduction] - -[section:feedback Feedback] - -If you believe you've found a bug in pugixml (bugs include compilation problems (errors/warnings), crashes, performance degradation and incorrect behavior), please file an issue via [@http://code.google.com/p/pugixml/issues/entry issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. - -Feature requests can be reported the same way as bugs, so if you're missing some functionality in pugixml or if the API is rough in some places and you can suggest an improvement, file an issue. However please note that there are many factors when considering API changes (compatibility with previous versions, API redundancy, etc.), so generally features that can be implemented via a small function without pugixml modification are not accepted. However, all rules have exceptions. - -If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some language other than C++, please file an issue. You can include the relevant patches as issue attachments. Your contribution has to be distributed under the terms of a license that's compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. - -[#email] -If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: [@mailto:arseny.kapoulkine@gmail.com arseny.kapoulkine@gmail.com]. - -[endsect] [/feedback] - -[section:thanks Acknowledgments] - -pugixml could not be developed without the help from many people; some of them are listed in this section. If you've played a part in pugixml development and you can not find yourself on this list, I'm truly sorry; please [link email send me an e-mail] so I can fix this. - -Thanks to *Kristen Wegner* for pugxml parser, which was used as a basis for pugixml. - -Thanks to *Neville Franks* for contributions to pugxml parser. - -Thanks to *Artyom Palvelev* for suggesting a lazy gap contraction approach. - -Thanks to *Vyacheslav Egorov* for documentation proofreading. - -[endsect] [/thanks] - -[section:license License] - -The pugixml library is distributed under the MIT license: - -[: -Copyright (c) 2006-2010 Arseny Kapoulkine - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. -] - -[endsect] [/license] - -[endsect] [/overview] - -[section:install Installation] - -[section:getting Getting pugixml] - -pugixml is distributed in source form. You can either download a source distribution or checkout the Subversion repository. - -[section:source Source distributions] - -You can download the latest source distribution via one of the following links: - -[pre -[@http://pugixml.googlecode.com/files/pugixml-0.9.zip] -[@http://pugixml.googlecode.com/files/pugixml-0.9.tar.gz] -] - -The distribution contains library source, documentation (the manual you're reading now and the quick start guide) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. - -If you need an older version, you can download it from the [@http://code.google.com/p/pugixml/downloads/list version archive]. - -[endsect] [/source] - -[section:subversion Subversion repository] - -The Subversion repository is located at [@http://pugixml.googlecode.com/svn/]. There is a Subversion tag "release-{version}" for each version; also there is the "latest" tag, which always points to the latest stable release. - -For example, to checkout the current version, you can use this command: - -[pre svn checkout http://pugixml.googlecode.com/svn/tags/release-0.9 pugixml] - -To checkout the latest version, you can use this command: - -[pre svn checkout http://pugixml.googlecode.com/svn/tags/latest pugixml] - -The repository contains library source, documentation, code examples and full unit test suite. - -Use latest version tag if you want to automatically get new versions via =svn update=. Use other tags if you want to switch to new versions only explicitly (for example, using =svn switch= command). Also please note that Subversion trunk contains the work-in-progress version of the code; while this means that you can get new features and bug fixes from trunk without waiting for a new release, this also means that occasionally the code can be broken in some configurations. - -[endsect] [/subversion] - -[endsect] [/getting] - -[section:building Building pugixml] - -pugixml is distributed in source form without any pre-built binaries; you have to build them yourself. - -The complete pugixml source consists of four files - two source files, [file pugixml.cpp] and [file pugixpath.cpp], and two header files, [file pugixml.hpp] and [file pugiconfig.hpp]. [file pugixml.hpp] is the primary header which you need to include in order to use pugixml classes/functions; [file pugiconfig.hpp] is a supplementary configuration file (see [sref manual.install.building.config]). The rest of this guide assumes that [file pugixml.hpp] is either in the current directory or in one of include directories of your projects, so that `#include "pugixml.hpp"` can find the header; however you can also use relative path (i.e. `#include "../libs/pugixml/src/pugixml.hpp"`) or include directory-relative path (i.e. `#include `). - -[note You don't need to compile [file pugixpath.cpp] unless you use XPath.] - -[section:embed Building pugixml as a part of another static library/executable] - -The easiest way to build pugixml is to compile two source files, [file pugixml.cpp] and [file pugixpath.cpp], along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio[ftnt trademarks All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add [file pugixml.cpp] and [file pugixpath.cpp] to one of your projects. - -If you're using Microsoft Visual Studio and the project has precompiled headers turned on, you'll see the following error messages: - -[pre pugixpath.cpp(3477) : fatal error C1010: unexpected end of file while looking for precompiled header. Did you forget to add '#include "stdafx.h"' to your source?] - -The correct way to resolve this is to disable precompiled headers for [file pugixml.cpp] and [file pugixpath.cpp]; you have to set "Create/Use Precompiled Header" option (Properties dialog -> C/C++ -> Precompiled Headers -> Create/Use Precompiled Header) to "Not Using Precompiled Headers". You'll have to do it for both [file pugixml.cpp] and [file pugixpath.cpp], for all project configurations/platforms (you can select Configuration "All Configurations" and Platform "All Platforms" before editing the option): - -[table -[[ -[@images/vs2005_pch1.png [$images/vs2005_pch1_thumb.png]] -[$images/next.png] -[@images/vs2005_pch2.png [$images/vs2005_pch2_thumb.png]] -[$images/next.png] -[@images/vs2005_pch3.png [$images/vs2005_pch3_thumb.png]] -[$images/next.png] -[@images/vs2005_pch4.png [$images/vs2005_pch4_thumb.png]] -]] ] - -[endsect] [/embed] - -[section:static Building pugixml as a standalone static library] - -It's possible to compile pugixml as a standalone static library. This process depends on the method of building your application; pugixml distribution comes with project files for several popular IDEs/build systems. There are project files for Apple XCode3, Code::Blocks, Codelite, Microsoft Visual Studio 2005, 2008, 2010, and configuration scripts for CMake and premake4. You're welcome to submit project files/build scripts for other software; see [sref manual.overview.feedback]. - -There are two projects for each version of Microsoft Visual Studio: one for dynamically linked CRT, which has a name like [file pugixml_vs2008.vcproj], and another one for statically linked CRT, which has a name like [file pugixml_vs2008_static.vcproj]. You should select the version that matches the CRT used in your application; the default option for new projects created by Microsoft Visual Studio is dynamically linked CRT, so unless you changed the defaults, you should use the version with dynamic CRT (i.e. [file pugixml_vs2008.vcproj] for Microsoft Visual Studio 2008). - -In addition to adding pugixml project to your workspace, you'll have to make sure that your application links with pugixml library. If you're using Microsoft Visual Studio 2005/2008, you can add a dependency from your application project to pugixml one. If you're using Microsoft Visual Studio 2010, you'll have to add a reference to your application project instead. For other IDEs/systems, consult the relevant documentation. - -[table -[[Microsoft Visual Studio 2005/2008][Microsoft Visual Studio 2010]] -[[ -[@images/vs2005_link1.png [$images/vs2005_link1_thumb.png]] -[$images/next.png] -[@images/vs2005_link2.png [$images/vs2005_link2_thumb.png]] -][ -[@images/vs2010_link1.png [$images/vs2010_link1_thumb.png]] -[$images/next.png] -[@images/vs2010_link2.png [$images/vs2010_link2_thumb.png]] -]] ] - -[endsect] [/static] - -[section:shared Building pugixml as a standalone shared library] - -It's possible to compile pugixml as a standalone shared library. The process is usually similar to the static library approach; however, no preconfigured projects/scripts are included into pugixml distribution, so you'll have to do it yourself. Generally, if you're using GCC-based toolchain, the process does not differ from building any other library as DLL (adding -shared to compilation flags should suffice); if you're using MSVC-based toolchain, you'll have to explicitly mark exported symbols with a declspec attribute. You can do it by defining `PUGIXML_API` macro, i.e. via [file pugiconfig.hpp]: - - #ifdef _DLL - #define PUGIXML_API __declspec(dllexport) - #else - #define PUGIXML_API __declspec(dllimport) - #endif - -[endsect] [/shared] - -[section:config Additional configuration options] - -pugixml uses several defines to control the compilation process. There are two ways to define them: either put the needed definitions to [file pugiconfig.hpp] (it has some examples that are commented out) or provide them via compiler command-line. Define consistency is important, i.e. the definitions should match in all source files that include [file pugixml.hpp] (including pugixml sources) throughout the application. Adding defines to [file pugiconfig.hpp] lets you guarantee this, unless your macro definition is wrapped in preprocessor `#if`/`#ifdef` directive and this directive is not consistent. [file pugiconfig.hpp] will never contain anything but comments, which means that when upgrading to new version, you can safely leave your modified version intact. - -[anchor PUGIXML_WCHAR_MODE] define toggles between UTF-8 style interface (the in-memory text encoding is assumed to be UTF-8, most functions use `char` as character type) and UTF-16/32 style interface (the in-memory text encoding is assumed to be UTF-16/32, depending on `wchar_t` size, most functions use `wchar_t` as character type). See [sref manual.dom.unicode] for more details. - -[anchor PUGIXML_NO_XPATH] define disables XPath. Both XPath interfaces and XPath implementation are excluded from compilation; you can still compile the file [file pugixpath.cpp] (it will result in an empty translation unit). This option is provided in case you do not need XPath functionality and need to save code space. - -[anchor PUGIXML_NO_STL] define disables use of STL in pugixml. The functions that operate on STL types are no longer present (i.e. load/save via iostream) if this macro is defined. This option is provided in case your target platform does not have a standard-compliant STL implementation. - -[note As of version 0.9, STL is used in XPath implementation; therefore, XPath is also disabled if this macro is defined. This will change in version 1.0.] - -[anchor PUGIXML_NO_EXCEPTIONS] define disables use of exceptions in pugixml. This option is provided in case your target platform does not have exception handling capabilities - -[note As of version 0.9, exceptions are *only* used in XPath implementation; therefore, XPath is also disabled if this macro is defined. This will change in version 1.0.] - -[anchor PUGIXML_API], [anchor PUGIXML_CLASS] and [anchor PUGIXML_FUNCTION] defines let you specify custom attributes (i.e. declspec or calling conventions) for pugixml classes and non-member functions. In absence of `PUGIXML_CLASS` or `PUGIXML_FUNCTION` definitions, `PUGIXML_API` definition is used instead. For example, to specify fixed calling convention, you can define `PUGIXML_FUNCTION` to i.e. `__fastcall`. Another example is DLL import/export attributes in MSVC (see [sref manual.install.building.shared]). - -[note In that example `PUGIXML_API` is inconsistent between several source files; this is an exception to the consistency rule.] - -[endsect] [/config] - -[endsect] [/building] - -[section:portability Portability] - -pugixml is written in standard-compliant C++ with some compiler-specific workarounds where appropriate. pugixml is compatible with the upcoming C++0x standard (verified using GCC 4.5). Each version is tested with a unit test suite (with code coverage about 99%) on the following platforms: - -* Microsoft Windows: - * Borland C++ Compiler 5.82 - * Digital Mars C++ Compiler 8.51 - * Intel C++ Compiler 8.0, 9.0 x86/x64, 10.0 x86/x64, 11.0 x86/x64 - * Metrowerks CodeWarrior 8.0 - * Microsoft Visual C++ 6.0, 7.0 (2002), 7.1 (2003), 8.0 (2005) x86/x64, 9.0 (2008) x86/x64, 10.0 (2010) x86/x64 - * MinGW (GCC) 3.4, 4.4, 4.5, 4.6 x64 - -* Linux (GCC 4.4.3 x86/x64) -* FreeBSD (GCC 4.2.1 x86/x64) -* Apple MacOSX (GCC 4.0.1 x86/x64/PowerPC) -* Microsoft Xbox 360 -* Nintendo Wii (Metrowerks CodeWarrior 4.1) -* Sony Playstation Portable (GCC 3.4.2) -* Sony Playstation 3 (GCC 4.1.1, SNC 310.1) - -[endsect] [/portability] - -[endsect] [/install] - -[section:dom Document object model] - -pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from character stream (file, string, C++ I/O stream), then traversed via special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C++ I/O stream or custom transport). - -[section:tree Tree structure] - -The XML document is represented with a tree data structure. The root of the tree is the document itself, which corresponds to C++ type `xml_document`. Document has one or more child nodes, which correspond to C++ type `xml_node`. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C++ type `xml_attribute`, and some additional data (i.e. name). - -[#xml_node_type] -The tree nodes can be of one of the following types (which together form the enumeration `xml_node_type`): - -* Document node ([anchor node_document]) - this is the root of the tree, which consists of several child nodes. This node corresponds to `xml_document` class; note that `xml_document` is a sub-class of `xml_node`, so the entire node interface is also available. However, document node is special in several ways, which will be covered below. There can be only one document node in the tree; document node does not have any XML representation. -[lbr] - -* Element/tag node ([anchor node_element]) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. The example XML representation of element node is as follows: - - - -[:There are two element nodes here; one has name `"node"`, single attribute `"attr"` and single child `"child"`, another has name `"child"` and does not have any attributes or child nodes.] - -* Plain character data nodes ([anchor node_pcdata]) represent plain text in XML. PCDATA nodes have a value, but do not have name or children/attributes. Note that plain character data is not a part of the element node but instead has its own node; for example, an element node can have several child PCDATA nodes. The example XML representation of text node is as follows: - - text1 text2 - -[:Here `"node"` element has three children, two of which are PCDATA nodes with values `"text1"` and `"text2"`.] - -* Character data nodes ([anchor node_cdata]) represent text in XML that is quoted in a special way. CDATA nodes do not differ from PCDATA nodes except in XML representation - the above text example looks like this with CDATA: - - - -[:CDATA nodes make it easy to include non-escaped <, & and > characters in plain text. CDATA value can not contain the character sequence \]\]>, since it is used to determine the end of node contents.] - -* Comment nodes ([anchor node_comment]) represent comments in XML. Comment nodes have a value, but do not have name or children/attributes. The example XML representation of comment node is as follows: - - - -[:Here the comment node has value `"comment text"`. By default comment nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior by adding `parse_comments` flag.] - -* Processing instruction node ([anchor node_pi]) represent processing instructions (PI) in XML. PI nodes have a name and an optional value, but do not have children/attributes. The example XML representation of PI node is as follows: - - - -[:Here the name (also called PI target) is `"name"`, and the value is `"value"`. By default PI nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior by adding `parse_pi` flag.] - -* Declaration node ([anchor node_declaration]) represents document declarations in XML. Declaration nodes have a name (`"xml"`) and an optional collection of attributes, but does not have value or children. There can be only one declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of declaration node is as follows: - - - -[:Here the node has name `"xml"` and a single attribute with name `"version"` and value `"1.0"`. By default declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior by adding `parse_declaration` flag. Also, by default a dummy declaration is output when XML document is saved unless there is already a declaration in the document; you can disable this by adding `format_no_declaration` flag.] - -Finally, here is a complete example of XML document and the corresponding tree representation ([@samples/tree.xml]): - -[table - -[[ -`` - - - - some text - - some more text - - - - - - -`` -][ -[@images/dom_tree.png [$images/dom_tree_thumb.png]] -]]] - - -[endsect] [/tree] - -[section:cpp C++ interface] - -[note All pugixml classes and functions are located in `pugi` namespace; you have to either use explicit name qualification (i.e. `pugi::xml_node`), or to gain access to relevant symbols via `using` directive (i.e. `using pugi::xml_node;` or `using namespace pugi;`). The namespace will be omitted from declarations in this documentation hereafter; all code examples will use fully-qualified names.] - -Despite the fact that there are several node types, there are only three C++ types representing the tree (`xml_document`, `xml_node`, `xml_attribute`); some operations on `xml_node` are only valid for certain node types. They are described below. - -[#xml_document] -`xml_document` is the owner of the entire document structure; it is a non-copyable class. The interface of `xml_document` consists of loading functions (see [sref manual.loading]), saving functions (see [sref manual.saving]) and the interface of `xml_node`, which allows for document inspection and/or modification. Note that while `xml_document` is a sub-class of `xml_node`, `xml_node` is not a polymorphic type; the inheritance is only used to simplify usage. - -[#xml_document::ctor] -[#xml_document::dtor] -Default constructor of `xml_document` initializes the document to the tree with only a root node (document node). You can then populate it with data using either tree modification functions or loading functions; all loading functions destroy the previous tree with all occupied memory, which puts existing nodes/attributes from this document to invalid state. Destructor of `xml_document` also destroys the tree, thus the lifetime of the document object should exceed the lifetimes of any node/attribute handles that point to the tree. - -[caution While technically node/attribute handles can be alive when the tree they're referring to is destroyed, calling any member function of these handles results in undefined behavior. Thus it is recommended to make sure that the document is destroyed only after all references to its nodes/attributes are destroyed.] - -[#xml_node] -[#xml_node::type] -`xml_node` is the handle to document node; it can point to any node in the document, including document itself. There is a common interface for nodes of all types; the actual node type can be queried via `xml_node::type()` method. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. The size of `xml_node` is equal to that of a pointer, so it is nothing more than a lightweight wrapper around pointer; you can safely pass or return `xml_node` objects by value without additional overhead. - -[#node_null] -There is a special value of `xml_node` type, known as null node or empty node (such nodes have type `node_null`). It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don't do anything and return empty nodes/attributes or empty strings as their result (see documentation for specific functions for more detailed information). This is useful for chaining calls; i.e. you can get the grandparent of a node like so: `node.parent().parent()`; if a node is a null node or it does not have a parent, the first `parent()` call returns null node; the second `parent()` call then also returns null node, so you don't have to check for errors twice. - -[#xml_attribute] -`xml_attribute` is the handle to an XML attribute; it has the same semantics as `xml_node`, i.e. there can be several `xml_attribute` handles pointing to the same underlying object, there is a special null attribute value, which propagates to function results. - -[#xml_attribute::ctor] -[#xml_node::ctor] -Both `xml_node` and `xml_attribute` have the default constructor which initializes them to null objects. - -[#xml_attribute::comparison] -[#xml_node::comparison] -`xml_node` and `xml_attribute` try to behave like pointers, that is, they can be compared with other objects of the same type, making it possible to use them as keys of associative containers. All handles to the same underlying object are equal, and any two handles to different underlying objects are not equal. Null handles only compare as equal to themselves. The result of relational comparison can not be reliably determined from the order of nodes in file or other ways. Do not use relational comparison operators except for search optimization (i.e. associative container keys). - -[#xml_attribute::unspecified_bool_type] -[#xml_node::unspecified_bool_type] -[#xml_attribute::empty] -[#xml_node::empty] -Additionally handles they can be implicitly cast to boolean-like objects, so that you can test if the node\/attribute is empty by just doing `if (node) { ... }` or `if (!node) { ... } else { ... }`. Alternatively you can check if a given `xml_node`/`xml_attribute` handle is null by calling the following methods: - - bool xml_attribute::empty() const; - bool xml_node::empty() const; - -Nodes and attributes do not exist outside of document tree, so you can't create them without adding them to some document. Once underlying node/attribute objects are destroyed, the handles to those objects become invalid. While this means that destruction of the entire tree invalidates all node/attribute handles, it also means that destroying a subtree (by calling `remove_child`) or removing an attribute invalidates the corresponding handles. There is no way to check handle validity; you have to ensure correctness through external mechanisms. - -[endsect] [/cpp] - -[section:unicode Unicode interface] - -There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via `PUGIXML_WCHAR_MODE` define; you can set it via [file pugiconfig.hpp] or via preprocessor options, as discussed in [sref manual.install.building.config]. If this define is set, the wchar_t interface is used; otherwise (by default) the char interface is used. The exact wide character encoding is assumed to be either UTF-16 or UTF-32 and is determined based on size of `wchar_t` type. - -[note If size of `wchar_t` is 2, pugixml assumes UTF-16 encoding instead of UCS-2, which means that some characters are represented as two code points.] - -All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. For example, node name accessors look like this in char mode: - - const char* xml_node::name() const; - bool xml_node::set_name(const char* value); - -and like this in wchar_t mode: - - const wchar_t* xml_node::name() const; - bool xml_node::set_name(const wchar_t* value); - -[#char_t] -[#string_t] -There is a special type, `pugi::char_t`, that is defined as the character type and depends on the library configuration; it will be also used in the documentation hereafter. There is also a type `pugi::string_t`, which is defined as the STL string of the character type; it corresponds to `std::string` in char mode and to `std::wstring` in wchar_t mode. - -In addition to the interface, the internal implementation changes to store XML data as `pugi::char_t`; this means that these two modes have different memory usage characteristics. The conversion to `pugi::char_t` upon document loading and from `pugi::char_t` upon document saving happen automatically, which also carries minor performance penalty. The general advice however is to select the character mode based on usage scenario, i.e. if UTF-8 is inconvenient to process and most of your XML data is localized, wchar_t mode is probably a better choice. - -[#as_utf8] -[#as_wide] -There are cases when you'll have to convert string data between UTF-8 and wchar_t encodings; the following helper functions are provided for such purposes: - - std::string as_utf8(const wchar_t* str); - std::wstring as_wide(const char* str); - -Both functions accept null-terminated string as an argument `str`, and return the converted string. `as_utf8` performs conversion from UTF-16/32 to UTF-8; `as_wide` performs conversion from UTF-8 to UTF-16/32. Invalid UTF sequences are silently discarded upon conversion. `str` has to be a valid string; passing null pointer results in undefined behavior. - -[note Most examples in this documentation assume char interface and therefore will not compile with `PUGIXML_WCHAR_MODE`. This is to simplify the documentation; usually the only changes you'll have to make is to pass `wchar_t` string literals, i.e. instead of - -`pugi::xml_node node = doc.child("bookstore").find_child_by_attribute("book", "id", "12345");` - -you'll have to do - -`pugi::xml_node node = doc.child(L"bookstore").find_child_by_attribute(L"book", L"id", L"12345");`] - -[endsect] [/unicode] - -[section:thread Thread-safety guarantees] - -Almost all functions in pugixml have the following thread-safety guarantees: - -* it is safe to call free functions from multiple threads -* it is safe to perform concurrent read-only accesses to the same tree (all constant member functions do not modify the tree) -* it is safe to perform concurrent read/write accesses, if there is only one read or write access to the single tree at a time - -Concurrent modification and traversing of a single tree requires synchronization, for example via reader-writer lock. Modification includes altering document structure and altering individual node/attribute data, i.e. changing names/values. - -The only exception is `set_memory_management_functions`; it modifies global variables and as such is not thread-safe. Its usage policy has more restrictions, see [sref manual.dom.memory.custom]. - -[endsect] [/thread] - -[section:exception Exception guarantees] - -With the exception of XPath, pugixml itself does not throw any exceptions. Additionally, most pugixml functions have a no-throw exception guarantee. - -This is not applicable to functions that operate on STL strings or IOstreams; such functions have either strong guarantee (functions that operate on strings) or basic guarantee (functions that operate on streams). Also functions that call user-defined callbacks (i.e. `xml_node::traverse` or `xml_node::find_node`) do not provide any exception guarantees beyond the ones provided by callback. - -XPath functions may throw `xpath_exception` on parsing error; also, XPath implementation uses STL, and thus may throw i.e. `std::bad_alloc` in low memory conditions. Still, XPath functions provide strong exception guarantee. - -[endsect] [/exception] - -[section:memory Memory management] - -pugixml requests the memory needed for document storage in big chunks, and allocates document data inside those chunks. This section discusses replacing functions used for chunk allocation and internal memory management implementation. - -[section:custom Custom memory allocation/deallocation functions] - -[#allocation_function] -[#deallocation_function] -All memory for tree structure/data is allocated via globally specified functions, which default to malloc/free. You can set your own allocation functions with set_memory_management functions. The function interfaces are the same as that of malloc/free: - - typedef void* (*allocation_function)(size_t size); - typedef void (*deallocation_function)(void* ptr); - -[#set_memory_management_functions] -[#get_memory_allocation_function] -[#get_memory_deallocation_function] -You can use the following accessor functions to change or get current memory management functions: - - void set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); - allocation_function get_memory_allocation_function(); - deallocation_function get_memory_deallocation_function(); - -Allocation function is called with the size (in bytes) as an argument and should return a pointer to memory block with alignment that is suitable for pointer storage and size that is greater or equal to the requested one. If the allocation fails, the function has to return null pointer (throwing an exception from allocation function results in undefined behavior). Deallocation function is called with the pointer that was returned by the previous call or with a null pointer; null pointer deallocation should be handled as a no-op. If memory management functions are not thread-safe, library thread safety is not guaranteed. - -This is a simple example of custom memory management ([@samples/custom_memory_management.cpp]): - -[import samples/custom_memory_management.cpp] -[code_custom_memory_management_decl] -[code_custom_memory_management_call] - -When setting new memory management functions, care must be taken to make sure that there are no live pugixml objects. Otherwise when the objects are destroyed, the new deallocation function will be called with the memory obtained by the old allocation function, resulting in undefined behavior. - -[note Currently memory for XPath objects is allocated using default operators new/delete; this will change in the next version.] - -[endsect] [/custom] - -[section:internals Document memory management internals] - -Constructing a document object using the default constructor does not result in any allocations; document node is stored inside the `xml_document` object. - -When the document is loaded from file/buffer, unless an inplace loading function is used (see [sref manual.loading.memory]), a complete copy of character stream is made; all names/values of nodes and attributes are allocated in this buffer. This buffer is allocated via a single large allocation and is only freed when document memory is reclaimed (i.e. if the `xml_document` object is destroyed or if another document is loaded in the same object). Also when loading from file or stream, an additional large allocation may be performed if encoding conversion is required; a temporary buffer is allocated, and it is freed before load function returns. - -All additional memory, such as memory for document structure (node/attribute objects) and memory for node/attribute names/values is allocated in pages on the order of 32 kilobytes; actual objects are allocated inside the pages using a memory management scheme optimized for fast allocation/deallocation of many small objects. Because of the scheme specifics, the pages are only destroyed if all objects inside them are destroyed; also, generally destroying an object does not mean that subsequent object creation will reuse the same memory. This means that it is possible to devise a usage scheme which will lead to higher memory usage than expected; one example is adding a lot of nodes, and them removing all even numbered ones; not a single page is reclaimed in the process. However this is an example specifically crafted to produce unsatisfying behavior; in all practical usage scenarios the memory consumption is less than that of a general-purpose allocator because allocation meta-data is very small in size. - -[endsect] [/internals] - -[endsect] [/memory] - -[endsect] [/dom] - -[section:loading Loading document] - -pugixml provides several functions for loading XML data from various places - files, C++ iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed because of performance reasons. Also some XML transformations (i.e. EOL handling or attribute value normalization) can impact parsing speed and thus can be disabled. However for vast majority of XML documents there is no performance difference between different parsing options. Parsing options also control whether certain XML nodes are parsed; see [sref manual.loading.options] for more information. - -XML data is always converted to internal character format (see [sref manual.dom.unicode]) before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions automatically. Unless explicit encoding is specified, loading functions perform automatic encoding detection based on first few characters of XML data, so in almost all cases you do not have to specify document encoding. Encoding conversion is described in more detail in [sref manual.loading.encoding]. - -[section:file Loading document from file] - -[#xml_document::load_file] -The most common source of XML data is files; pugixml provides a separate function for loading XML document from file: - - xml_parse_result xml_document::load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - -This function accepts file path as its first argument, and also two optional arguments, which specify parsing options (see [sref manual.loading.options]) and input data encoding (see [sref manual.loading.encoding]). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of target system, it should have the exact case if target file system is case-sensitive, etc. File path is passed to system file opening function as is. - -`load_file` destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an `xml_parse_result` object; this object contains the operation status, and the related information (i.e. last successfully parsed position in the input file, if parsing fails). See [sref manual.loading.errors] for error handling details. - -[note As of version 0.9, there is no function for loading XML document from wide character path. Unfortunately, there is no portable way to do this; the version 1.0 will provide such function only for platforms with the corresponding functionality. You can use stream-loading functions as a workaround if your STL implementation can open file streams via `wchar_t` paths.] - -This is an example of loading XML document from file ([@samples/load_file.cpp]): - -[import samples/load_file.cpp] -[code_load_file] - -[endsect] [/file] - -[section:memory Loading document from memory] - -[#xml_document::load_buffer] -[#xml_document::load_buffer_inplace] -[#xml_document::load_buffer_inplace_own] -Sometimes XML data should be loaded from some other source than file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. All these scenarios require loading document from memory. First you should prepare a contiguous memory block with all XML data; then you have to invoke one of buffer loading functions. These functions will handle the necessary encoding conversions, if any, and then will parse the data into the corresponding XML tree. There are several buffer loading functions, which differ in the behavior and thus in performance/memory usage: - - xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - -All functions accept the buffer which is represented by a pointer to XML data, `contents`, and data size in bytes. Also there are two optional arguments, which specify parsing options (see [sref manual.loading.options]) and input data encoding (see [sref manual.loading.encoding]). The buffer does not have to be zero-terminated. - -`load_buffer` function works with immutable buffer - it does not ever modify the buffer. Because of this restriction it has to create a private buffer and copy XML data to it before parsing (applying encoding conversions if necessary). This copy operation carries a performance penalty, so inplace functions are provided - `load_buffer_inplace` and `load_buffer_inplace_own` store the document data in the buffer, modifying it in the process. In order for the document to stay valid, you have to make sure that the buffer's lifetime exceeds that of the tree if you're using inplace functions. In addition to that, `load_buffer_inplace` does not assume ownership of the buffer, so you'll have to destroy it yourself; `load_buffer_inplace_own` assumes ownership of the buffer and destroys it once it is not needed. This means that if you're using `load_buffer_inplace_own`, you have to allocate memory with pugixml allocation function (you can get it via [link get_memory_allocation_function]). - -The best way from the performance/memory point of view is to load document using `load_buffer_inplace_own`; this function has maximum control of the buffer with XML data so it is able to avoid redundant copies and reduce peak memory usage while parsing. This is the recommended function if you have to load the document from memory and performance is critical. - -[#xml_document::load_string] -There is also a simple helper function for cases when you want to load the XML document from null-terminated character string: - - xml_parse_result xml_document::load(const char_t* contents, unsigned int options = parse_default); - -It is equivalent to calling `load_buffer` with `size = strlen(contents)`. This function assumes native encoding for input data, so it does not do any encoding conversion. In general, this function is fine for loading small documents from string literals, but has more overhead and less functionality than buffer loading functions. - -This is an example of loading XML document from memory using different functions ([@samples/load_memory.cpp]): - -[import samples/load_memory.cpp] -[code_load_memory_decl] -[code_load_memory_buffer] -[code_load_memory_buffer_inplace] -[code_load_memory_buffer_inplace_own] -[code_load_memory_string] - -[endsect] [/memory] - -[section:stream Loading document from C++ IOstreams] - -[#xml_document::load_stream] -For additional interoperability pugixml provides functions for loading document from any object which implements C++ `std::istream` interface. This allows you to load documents from any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). There are two functions, one works with narrow character streams, another handles wide character ones: - - xml_parse_result xml_document::load(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result xml_document::load(std::wistream& stream, unsigned int options = parse_default); - -`load` with `std::istream` argument loads the document from stream from the current read position to the end, treating the stream contents as a byte stream of the specified encoding (with encoding autodetection as necessary). Thus calling `xml_document::load` on an opened `std::ifstream` object is equivalent to calling `xml_document::load_file`. - -`load` with `std::wstream` argument treats the stream contents as a wide character stream (encoding is always `encoding_wchar`). Because of this, using `load` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you the ability to load documents from non-Unicode encodings, i.e. you can load Shift-JIS encoded data if you set the correct locale. - -This is a simple example of loading XML document from file using streams ([@samples/load_stream.cpp]); read the sample code for more complex examples involving wide streams and locales: - -[import samples/load_stream.cpp] -[code_load_stream] - -Stream loading requires working seek/tell functions and therefore may fail when used with some stream implementations like gzstream. - -[endsect] [/stream] - -[section:errors Handling parsing errors] - -[#xml_parse_result] -All document loading functions return the parsing result via `xml_parse_result` object. It contains parsing status, the offset of last successfully parsed character from the beginning of the source stream, and the encoding of the source stream: - - struct xml_parse_result - { - xml_parse_status status; - ptrdiff_t offset; - xml_encoding encoding; - - operator bool() const; - const char* description() const; - }; - -[#xml_parse_status] -[#xml_parse_result::status] -Parsing status is represented as the `xml_parse_status` enumeration and can be one of the following: - -* [anchor status_ok] means that no error was encountered during parsing; the source stream represents the valid XML document which was fully parsed and converted to a tree. -[lbr] - -* [anchor status_file_not_found] is only returned by `load_file` function and means that file could not be opened. -* [anchor status_io_error] is returned by `load_file` function and by `load` functions with `std::istream`/`std::wstream` arguments; it means that some I/O error has occured during reading the file/stream. -* [anchor status_out_of_memory] means that there was not enough memory during some allocation; any allocation failure during parsing results in this error. -* [anchor status_internal_error] means that something went horribly wrong; currently this error does not occur -[lbr] - -* [anchor status_unrecognized_tag] means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as [^#]. -* [anchor status_bad_pi] means that parsing stopped due to incorrect document declaration/processing instruction -* [anchor status_bad_comment], [anchor status_bad_cdata], [anchor status_bad_doctype] and [anchor status_bad_pcdata] mean that parsing stopped due to the invalid construct of the respective type -* [anchor status_bad_start_element] means that parsing stopped because starting tag either had no closing `>` symbol or contained some incorrect symbol -* [anchor status_bad_attribute] means that parsing stopped because there was an incorrect attribute, such as an attribute without value or with value that is not quoted (note that `` is incorrect in XML) -* [anchor status_bad_end_element] means that parsing stopped because ending tag had incorrect syntax (i.e. extra non-whitespace symbols between tag name and `>`) -* [anchor status_end_element_mismatch] means that parsing stopped because the closing tag did not match the opening one (i.e. ``) or because some tag was not closed at all - -[#xml_parse_result::description] -`description()` member function can be used to convert parsing status to a string; the returned message is always in English, so you'll have to write your own function if you need a localized string. However please note that the exact messages returned by `description()` function may change from version to version, so any complex status handling should be based on `status` value. - -If parsing failed because the source data was not a valid XML, the resulting tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that was successfully parsed. Obviously, the last element may have an unexpected name/value; for example, if the attribute value does not end with the necessary quotation mark, like in [^` (document declaration) is not considered to be a PI. This flag is *off* by default. -[lbr] - -* [anchor parse_comments] determines if comments (nodes with type [link node_comment]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *off* by default. -[lbr] - -* [anchor parse_cdata] determines if CDATA sections (nodes with type [link node_cdata]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *on* by default. -[lbr] - -* [anchor parse_ws_pcdata] determines if PCDATA nodes (nodes with type [link node_pcdata]) that consist only of whitespace characters are to be put in DOM tree. Often whitespace-only data is not significant for the application, and the cost of allocating and storing such nodes (both memory and speed-wise) can be significant. For example, after parsing XML string ` `, `` element will have three children when `parse_ws_pcdata` is set (child with type `node_pcdata` and value `" "`, child with type `node_element` and name `"a"`, and another child with type `node_pcdata` and value `" "`), and only one child when `parse_ws_pcdata` is not set. This flag is *off* by default. - -These flags control the transformation of tree element contents: - -* [anchor parse_escapes] determines if character and entity references are to be expanded during the parsing process. Character references have the form [^&#...;] or [^&#x...;] ([^...] is Unicode numeric representation of character in either decimal ([^&#...;]) or hexadecimal ([^&#x...;]) form), entity references are [^<], [^>], [^&], [^'] and [^"] (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed in attribute values and PCDATA content. This flag is *on* by default. -[lbr] - -* [anchor parse_eol] determines if EOL handling (that is, replacing sequences `0x0d 0x0a` by a single `0x0a` character, and replacing all standalone `0x0d` characters by `0x0a`) is to be performed on input data (that is, comments contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. -[lbr] - -* [anchor parse_wconv_attribute] determines if attribute value normalization should be performed for all attributes. This means, that whitespace characters (new line, tab and space) are replaced with space (`' '`). New line characters are always treated as if `parse_eol` is set, i.e. `\r\n` is converted to single space. This flag is *on* by default. -[lbr] - -* [anchor parse_wnorm_attribute] determines if extended attribute value normalization should be performed for all attributes. This means, that after attribute values are normalized as if `parse_wconv_attribute` was set, leading and trailing space characters are removed, and all sequences of space characters are replaced by a single space character. The value of `parse_wconv_attribute` has no effect if this flag is on. This flag is *off* by default. - -[note `parse_wconv_attribute` option performs transformations that are required by W3C specification for attributes that are declared as [^CDATA]; `parse_wnorm_attribute` performs transformations required for [^NMTOKENS] attributes. In the absence of document type declaration all attributes behave as if they are declared as [^CDATA], thus `parse_wconv_attribute` is the default option.] - -Additionally there are two predefined option masks: - -* [anchor parse_minimal] has all options turned off. This option mask means that pugixml does not add declaration nodes, PI nodes, CDATA sections and comments to the resulting tree and does not perform any conversion for input data, so theoretically it is the fastest mode. However, as discussed above, in practice `parse_default` is usually equally fast. -[lbr] - -* [anchor parse_default] is the default set of flags, i.e. it has all options set to their default values. It includes parsing CDATA sections (comments/PIs are not parsed), performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed (by default) for performance reasons. - -This is an example of using different parsing options ([@samples/load_options.cpp]): - -[import samples/load_options.cpp] -[code_load_options] - -[endsect] [/options] - -[section:encoding Encodings] - -[#xml_encoding] -pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions. Most loading functions accept the optional parameter `encoding`. This is a value of enumeration type `xml_encoding`, that can have the following values: - -* [anchor encoding_auto] means that pugixml will try to guess the encoding based on source XML data. The algorithm is a modified version of the one presented in Appendix F.1 of XML recommendation; it tries to match the first few bytes of input data with the following patterns in strict order: -[lbr] - * If first four bytes match UTF-32 BOM (Byte Order Mark), encoding is assumed to be UTF-32 with the endianness equal to that of BOM; - * If first two bytes match UTF-16 BOM, encoding is assumed to be UTF-16 with the endianness equal to that of BOM; - * If first three bytes match UTF-8 BOM, encoding is assumed to be UTF-8; - * If first four bytes match UTF-32 representation of [^<], encoding is assumed to be UTF-32 with the corresponding endianness; - * If first four bytes match UTF-16 representation of [^`, calling `next_sibling` for a handle that points to `` results in a handle pointing to ``, and calling `previous_sibling` results in handle pointing to ``. If node does not have next/previous sibling (this happens if it is the last/first node in the list, respectively), the functions return null nodes. `first_attribute`, `last_attribute`, `next_attribute` and `previous_attribute` functions behave the same way as corresponding child node functions and allow to iterate through attribute list in the same way. - -[note Because of memory consumption reasons, attributes do not have a link to their parent nodes. Thus there is no `xml_attribute::parent()` function.] - -Calling any of the functions above on the null handle results in a null handle - i.e. `node.first_child().next_sibling()` returns the second child of `node`, and null handle if there is no children at all or if there is only one. - -With these functions, you can iterate through all child nodes and display all attributes like this ([@samples/traverse_base.cpp]): - -[code_traverse_base_basic] - -[endsect] [/basic] - -[section:nodedata Getting node data] - -[#xml_node::name][#xml_node::value] -Apart from structural information (parent, child nodes, attributes), nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. `node_document` nodes do not have name or value, `node_element` and `node_declaration` nodes always have a name but never have a value, `node_pcdata`, `node_cdata` and `node_comment` nodes never have a name but always have a value (it may be empty though), `node_pi` nodes always have a name and a value (again, value may be empty). In order to get node's name or value, you can use the following functions: - - const char_t* xml_node::name() const; - const char_t* xml_node::value() const; - -In case node does not have a name or value or if the node handle is null, both functions return empty strings - they never return null pointers. - -[#xml_node::child_value] -It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type `node_pcdata` with value `"This is a node"`. pugixml provides two helper functions to parse such data: - - const char_t* xml_node::child_value() const; - const char_t* xml_node::child_value(const char_t* name) const; - -`child_value()` returns the value of the first child with type `node_pcdata` or `node_cdata`; `child_value(name)` is a simple wrapper for `child(name).child_value()`. For the above example, calling `node.child_value("description")` and `description.child_value()` will both produce string `"This is a node"`. If there is no child with relevant type, or if the handle is null, `child_value` functions return empty string. - -There is an example of using some of these functions [link code_traverse_base_data at the end of the next section]. - -[endsect] [/nodedata] - -[section:attrdata Getting attribute data] - -[#xml_attribute::name][#xml_attribute::value] -All attributes have name and value, both of which are strings (value may be empty). There are two corresponding accessors, like for `xml_node`: - - const char_t* xml_attribute::name() const; - const char_t* xml_attribute::value() const; - -In case attribute handle is null, both functions return empty strings - they never return null pointers. - -[#xml_attribute::as_int][#xml_attribute::as_uint][#xml_attribute::as_double][#xml_attribute::as_float][#xml_attribute::as_bool] -In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type. The accessors are as follows: - - int xml_attribute::as_int() const; - unsigned int xml_attribute::as_uint() const; - double xml_attribute::as_double() const; - float xml_attribute::as_float() const; - bool xml_attribute::as_bool() const; - -`as_int`, `as_uint`, `as_double` and `as_float` convert attribute values to numbers. If attribute handle is null or attribute value is empty, `0` is returned. Otherwise, all leading whitespace characters are truncated, and the remaining string is parsed as a decimal number (`as_int` or `as_uint`) or as a floating point number in either decimal or scientific form (`as_double` or `as_float`). Any extra characters are silently discarded, i.e. `as_int` will return `1` for string `"1abc"`. - -In case the input string contains a number that is out of the target numeric range, the result is undefined. - -[caution Number conversion functions depend on current C locale as set with `setlocale`, so may return unexpected results if the locale is different from `"C"`.] - -`as_bool` converts attribute value to boolean as follows: if attribute handle is null or attribute value is empty, `false` is returned. Otherwise, `true` is returned if first character is one of `'1', 't', 'T', 'y', 'Y'`. This means that strings like `"true"` and `"yes"` are recognized as `true`, while strings like `"false"` and `"no"` are recognized as `false`. For more complex matching you'll have to write your own function. - -[note There are no portable 64-bit types in C++, so there is no corresponding conversion function. If your platform has a 64-bit integer, you can easily write a conversion function yourself.] - -[#code_traverse_base_data] -This is an example of using these functions, along with node data retrieval ones ([@samples/traverse_base.cpp]): - -[code_traverse_base_data] - -[endsect] [/attrdata] - -[section:contents Contents-based traversal functions] - -[#xml_node::child][#xml_node::attribute][#xml_node::next_sibling_name][#xml_node::previous_sibling_name] -Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose: - - xml_node xml_node::child(const char_t* name) const; - xml_attribute xml_node::attribute(const char_t* name) const; - xml_node xml_node::next_sibling(const char_t* name) const; - xml_node xml_node::previous_sibling(const char_t* name) const; - -`child` and `attribute` return the first child/attribute with the specified name; `next_sibling` and `previous_sibling` return the first sibling in the corresponding direction with the specified name. All string comparisons are case-sensitive. In case the node handle is null or there is no node\/attribute with the specified name, null handle is returned. - -`child` and `next_sibling` functions can be used together to loop through all child nodes with the desired name like this: - - for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) - -[#xml_node::find_child_by_attribute] -Occasionally the needed node is specified not by the unique name but instead by the value of some attribute; for example, it is common to have node collections with each node having a unique id: ` `. There are two functions for finding child nodes based on the attribute values: - - xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; - xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; - -The three-argument function returns the first child node with the specified name which has an attribute with the specified name/value; the two-argument function skips the name test for the node, which can be useful for searching in heterogeneous collections. If the node handle is null or if no node is found, null handle is returned. All string comparisons are case-sensitive. - -In all of the above functions, all arguments have to be valid strings; passing null pointers results in undefined behavior. - -This is an example of using these functions ([@samples/traverse_base.cpp]): - -[code_traverse_base_contents] - -[endsect] [/contents] - -[section:iterators Traversing node/attribute lists via iterators] - -[#xml_node_iterator][#xml_attribute_iterator][#xml_node::begin][#xml_node::end][#xml_node::attributes_begin][#xml_node::attributes_end] -Child node lists and attribute lists are simply double-linked lists; while you can use `previous_sibling`/`next_sibling` and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes: - - class xml_node_iterator; - class xml_attribute_iterator; - - typedef xml_node_iterator xml_node::iterator; - iterator xml_node::begin() const; - iterator xml_node::end() const; - - typedef xml_attribute_iterator xml_node::attribute_iterator; - attribute_iterator xml_node::attributes_begin() const; - attribute_iterator xml_node::attributes_end() const; - -`begin` and `attributes_begin` return iterators that point to the first node\/attribute, respectively; `end` and `attributes_end` return past-the-end iterator for node\/attribute list, respectively - this iterator can't be dereferenced, but decrementing it results in an iterator pointing to the last element in the list (except for empty lists, where decrementing past-the-end iterator is not defined). Past-the-end iterator is commonly used as a termination value for iteration loops (see sample below). If you want to get an iterator that points to an existing handle, you can construct the iterator with the handle as a single constructor argument, like so: `xml_node_iterator(node)`. For `xml_attribute_iterator`, you'll have to provide both an attribute and its parent node. - -`begin` and `end` return equal iterators if called on null node; such iterators can't be dereferenced. `attributes_begin` and `attributes_end` behave the same way. For correct iterator usage this means that child node\/attribute collections of null nodes appear to be empty. - -Both types of iterators have bidirectional iterator semantics (i.e. they can be incremented and decremented, but efficient random access is not supported) and support all usual iterator operations - comparison, dereference, etc. The iterators are invalidated if the node\/attribute objects they're pointing to are removed from the tree; adding nodes\/attributes does not invalidate any iterators. - -Here is an example of using iterators for document traversal ([@samples/traverse_iter.cpp]): - -[import samples/traverse_iter.cpp] -[code_traverse_iter] - -[caution Node and attribute iterators are somewhere in the middle between const and non-const iterators. While dereference operation yields a non-constant reference to the object, so that you can use it for tree modification operations, modifying this reference by assignment - i.e. passing iterators to a function like `std::sort` - will not give expected results, as assignment modifies local handle that's stored in the iterator.] - -[endsect] [/iterators] - -[section:walker Recursive traversal with xml_tree_walker] - -[#xml_tree_walker] -The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you'll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement `xml_tree_walker` interface and to call `traverse` function: - - class xml_tree_walker - { - public: - virtual bool begin(xml_node& node); - virtual bool for_each(xml_node& node) = 0; - virtual bool end(xml_node& node); - - int depth() const; - }; - - bool xml_node::traverse(xml_tree_walker& walker); - -[#xml_tree_walker::begin][#xml_tree_walker::for_each][#xml_tree_walker::end][#xml_node::traverse] -The traversal is launched by calling `traverse` function on traversal root and proceeds as follows: - -* First, `begin` function is called with traversal root as its argument. -* Then, `for_each` function is called for all nodes in the traversal subtree in depth first order, excluding the traversal root. Node is passed as an argument. -* Finally, `end` function is called with traversal root as its argument. - -If `begin`, `end` or any of the `for_each` calls return `false`, the traversal is terminated and `false` is returned as the traversal result; otherwise, the traversal results in `true`. Note that you don't have to override `begin` or `end` functions; their default implementations return `true`. - -[#xml_tree_walker::depth] -You can get the node's depth relative to the traversal root at any point by calling `depth` function. It returns `-1` if called from `begin`\/`end`, and returns 0-based depth if called from `for_each` - depth is 0 for all children of the traversal root, 1 for all grandchildren and so on. - -This is an example of traversing tree hierarchy with xml_tree_walker ([@samples/traverse_walker.cpp]): - -[import samples/traverse_walker.cpp] -[code_traverse_walker_impl] -[code_traverse_walker_traverse] - -[endsect] [/walker] - -[section:predicate Searching for nodes/attributes with predicates] - -[#xml_node::find_attribute][#xml_node::find_child][#xml_node::find_node] -While there are existing functions for getting a node/attribute with known contents, they are often not sufficient for simple queries. As an alternative to iterating manually through nodes/attributes until the needed one is found, you can make a predicate and call one of `find_` functions: - - template xml_attribute xml_node::find_attribute(Predicate pred) const; - template xml_node xml_node::find_child(Predicate pred) const; - template xml_node xml_node::find_node(Predicate pred) const; - -The predicate should be either a plain function or a function object which accepts one argument of type `xml_attribute` (for `find_attribute`) or `xml_node` (for `find_child` and `find_node`), and returns `bool`. The predicate is never called with null handle as an argument. - -`find_attribute` function iterates through all attributes of the specified node, and returns the first attribute for which predicate returned `true`. If predicate returned `false` for all attributes or if there were no attributes (including the case where the node is null), null attribute is returned. - -`find_child` function iterates through all child nodes of the specified node, and returns the first node for which predicate returned `true`. If predicate returned `false` for all nodes or if there were no child nodes (including the case where the node is null), null node is returned. - -`find_node` function performs a depth-first traversal through the subtree of the specified node (excluding the node itself), and returns the first node for which predicate returned `true`. If predicate returned `false` for all nodes or if subtree was empty, null node is returned. - -This is an example of using predicate-based functions ([@samples/traverse_predicate.cpp]): - -[import samples/traverse_predicate.cpp] -[code_traverse_predicate_decl] -[code_traverse_predicate_find] - -[endsect] [/predicate] - -[section:misc Miscellaneous functions] - -[#xml_node::root] -If you need to get the document root of some node, you can use the following function: - - xml_node xml_node::root() const; - -This function returns the node with type `node_document`, which is the root node of the document the node belongs to (unless the node is null, in which case null node is returned). Currently this function has logarithmic complexity, since it simply finds such ancestor of the given node which itself has no parent. - -[#xml_node::path] -[#xml_node::first_element_by_path] -While pugixml supports complex XPath expressions, sometimes a simple path handling facility is needed. There are two functions, for getting node path and for converting path to a node: - - string_t xml_node::path(char_t delimiter = '/') const; - xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter = '/') const; - -Node paths consist of node names, separated with a delimiter (which is `/` by default); also paths can contain self (`.`) and parent (`..`) pseudo-names, so that this is a valid path: `"../../foo/./bar"`. `path` returns the path to the node from the document root, `first_element_by_path` looks for a node represented by a given path; a path can be an absolute one (absolute paths start with delimiter), in which case the rest of the path is treated as document root relative, and relative to the given node. For example, in the following document: ``, node `` has path `"a/b/c"`; calling `first_element_by_path` for document with path `"a/b"` results in node ``; calling `first_element_by_path` for node `` with path `"../a/./b/../."` results in node ``; calling `first_element_by_path` with path `"/a"` results in node `` for any node. - -In case path component is ambiguous (if there are two nodes with given name), the first one is selected; paths are not guaranteed to uniquely identify nodes in a document. If any component of a path is not found, the result of `first_element_by_path` is null node; also `first_element_by_path` returns null node for null nodes, in which case the path does not matter. `path` returns an empty string for null nodes. - -[note `path` function returns the result as STL string, and thus is not available if `PUGIXML_NO_STL` is defined.] - -[#xml_node::offset_debug] -pugixml does not record row/column information for nodes upon parsing for efficiency reasons. However, if the node has not changed in a significant way since parsing (the name/value are not changed, and the node itself is the original one, i.e. it was not deleted from the tree and re-added later), it is possible to get the offset from the beginning of XML buffer: - - ptrdiff_t xml_node::offset_debug() const; - -If the offset is not available (this happens if the node is null, was not originally parsed from a stream, or has changed in a significant way), the function returns -1. Otherwise it returns the offset to node's data from the beginning of XML buffer in `pugi::char_t` units. For more information on parsing offsets, see [link xml_parse_result::offset parsing error handling documentation]. - -[endsect] [/misc] - -[endsect] [/access] - -[section:modify Modifying document data] - -The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. This section provides documentation for the relevant functions. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead. - -All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: `void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }`, so const-correctness here mainly provides additional documentation. - -[import samples/modify_base.cpp] - -[section:nodedata Setting node data] - -[#xml_node::set_name][#xml_node::set_value] -As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. `node_document` nodes do not have name or value, `node_element` and `node_declaration` nodes always have a name but never have a value, `node_pcdata`, `node_cdata` and `node_comment` nodes never have a name but always have a value (it may be empty though), `node_pi` nodes always have a name and a value (again, value may be empty). In order to set node's name or value, you can use the following functions: - - bool xml_node::set_name(const char_t* rhs); - bool xml_node::set_value(const char_t* rhs); - -Both functions try to set the name\/value to the specified string, and return the operation result. The operation fails if the node can not have name or value (for instance, when trying to call `set_name` on a `node_pcdata` node), if the node handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. - -There is no equivalent of `child_value` function for modifying text children of the node. - -This is an example of setting node name and value ([@samples/modify_base.cpp]): - -[code_modify_base_node] - -[endsect] [/nodedata] - -[section:attrdata Setting attribute data] - -[#xml_attribute::set_name][#xml_attribute::set_value] -All attributes have name and value, both of which are strings (value may be empty). You can set them with the following functions: - - bool xml_attribute::set_name(const char_t* rhs); - bool xml_attribute::set_value(const char_t* rhs); - -Both functions try to set the name\/value to the specified string, and return the operation result. The operation fails if the attribute handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. - -In addition to string functions, several functions are provided for handling attributes with numbers and booleans as values: - - bool xml_attribute::set_value(int rhs); - bool xml_attribute::set_value(unsigned int rhs); - bool xml_attribute::set_value(double rhs); - bool xml_attribute::set_value(bool rhs); - -The above functions convert the argument to string and then call the base `set_value` function. Integers are converted to a decimal form, floating-point numbers are converted to either decimal or scientific form, depending on the number magnitude, boolean values are converted to either `"true"` or `"false"`. - -[caution Number conversion functions depend on current C locale as set with `setlocale`, so may generate unexpected results if the locale is different from `"C"`.] - -[note There are no portable 64-bit types in C++, so there is no corresponding `set_value` function. If your platform has a 64-bit integer, you can easily write such a function yourself.] - -[#xml_attribute::assign] - -For convenience, all `set_value` functions have the corresponding assignment operators: - - xml_attribute& xml_attribute::operator=(const char_t* rhs); - xml_attribute& xml_attribute::operator=(int rhs); - xml_attribute& xml_attribute::operator=(unsigned int rhs); - xml_attribute& xml_attribute::operator=(double rhs); - xml_attribute& xml_attribute::operator=(bool rhs); - -These operators simply call the right `set_value` function and return the attribute they're called on; the return value of `set_value` is ignored, so errors are not detected. - -This is an example of setting attribute name and value ([@samples/modify_base.cpp]): - -[code_modify_base_attr] - -[endsect] [/attrdata] - -[section:add Adding nodes/attributes] - -[#xml_node::append_attribute][#xml_node::insert_attribute_after][#xml_node::insert_attribute_before][#xml_node::append_child][#xml_node::insert_child_after][#xml_node::insert_child_before] -Nodes and attributes do not exist outside of document tree, so you can't create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before\/after some other node: - - xml_attribute xml_node::append_attribute(const char_t* name); - xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr); - xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr); - - xml_node xml_node::append_child(xml_node_type type = node_element); - xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node); - xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node); - -`append_attribute` and `append_child` create a new node/attribute at the end of the corresponding list of the node the method is called on; `insert_attribute_after`, `insert_attribute_before`, `insert_child_after` and `insert_attribute_before` add the node\/attribute before or after specified node\/attribute. - -Attribute functions create an attribute with the specified name; you can specify the empty name and change the name later if you want to. Node functions create the node with the specified type; since node type can't be changed, you have to know the desired type beforehand. Also note that not all types can be added as children; see below for clarification. - -All functions return the handle to newly created object on success, and null handle on failure. There are several reasons for failure: - -* Adding fails if the target node is null; -* Only `node_element` nodes can contain attributes, so attribute adding fails if node is not an element; -* Only `node_document` and `node_element` nodes can contain children, so child node adding fails if target node is not an element or a document; -* `node_document` and `node_null` nodes can not be inserted as children, so passing `node_document` or `node_null` value as type results in operation failure; -* `node_declaration` nodes can only be added as children of the document node; attempt to insert declaration node as a child of an element node fails; -* Adding node/attribute results in memory allocation, which may fail; -* Insertion functions fail if the specified node or attribute is not in the target node's children/attribute list. - -Even if the operation fails, the document remains in consistent state, but the requested node/attribute is not added. - -[caution attribute() and child() functions do not add attributes or nodes to the tree, so code like `node.attribute("id") = 123;` will not do anything if `node` does not have an attribute with name `"id"`. Make sure you're operating with existing attributes/nodes by adding them if necessary.] - -This is an example of adding new attributes\/nodes to the document ([@samples/modify_add.cpp]): - -[import samples/modify_add.cpp] -[code_modify_add] - -[endsect] [/add] - -[section:remove Removing nodes/attributes] - -[#xml_node::remove_attribute][#xml_node::remove_child] -If you do not want your document to contain some node or attribute, you can remove it with one of the following functions: - - bool xml_node::remove_attribute(const xml_attribute& a); - bool xml_node::remove_child(const xml_node& n); - -`remove_attribute` removes the attribute from the attribute list of the node, and returns the operation result. `remove_child` removes the child node with the entire subtree (including all descendant nodes and attributes) from the document, and returns the operation result. Removing fails if one of the following is true: - -* The node the function is called on is null; -* The attribute\/node to be removed is null; -* The attribute\/node to be removed is not in the node's attribute\/child list. - -Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute\/node is removed. - -If you want to remove the attribute or child node by its name, two additional helper functions are available: - - bool xml_node::remove_attribute(const char_t* name); - bool xml_node::remove_child(const char_t* name); - -These functions look for the first attribute or child with the specified name, and then remove it, returning the result. If there is no attribute or child with such name, the function returns `false`; if there are two nodes with the given name, only the first node is deleted. If you want to delete all nodes with the specified name, you can use code like this: `while (node.remove_child("tool")) ;`. - -This is an example of removing attributes\/nodes from the document ([@samples/modify_remove.cpp]): - -[import samples/modify_remove.cpp] -[code_modify_remove] - -[endsect] [/remove] - -[section:clone Cloning nodes/attributes] - -[#xml_node::append_copy][#xml_node::insert_copy_after][#xml_node::insert_copy_before] -With the help of previously described functions, it is possible to create trees with any contents and structure, including cloning the existing data. However since this is an often needed operation, pugixml provides built-in node/attribute cloning facilities. Since nodes and attributes do not exist outside of document tree, you can't create a standalone copy - you have to immediately insert it somewhere in the tree. For this, you can use one of the following functions: - - xml_attribute xml_node::append_copy(const xml_attribute& proto); - xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); - xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); - xml_node xml_node::append_copy(const xml_node& proto); - xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node); - xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node); - -These functions mirror the structure of `append_child`, `insert_child_before` and related functions - they take the handle to the prototype object, which is to be cloned, insert a new attribute\/node at the appropriate place, and then copy the attribute data or the whole node subtree to the new object. The functions return the handle to the resulting duplicate object, or null handle on failure. - -The attribute is copied along with the name and value; the node is copied along with its type, name and value; additionally attribute list and all children are recursively cloned, resulting in the deep subtree clone. The prototype object can be a part of the same document, or a part of any other document. - -The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, [link xml_node::append_child consult their documentation for more information]. There are additional caveats specific to cloning functions: - -* Cloning null handles results in operation failure; -* Node cloning starts with insertion of the node of the same type as that of the prototype; for this reason, cloning functions can not be directly used to clone entire documents, since `node_document` is not a valid insertion type. The example below provides a workaround. -* It is possible to copy a subtree as a child of some node inside this subtree, i.e. `node.append_copy(node.parent().parent());`. This is a valid operation, and it results in a clone of the subtree in the state before cloning started, i.e. no infinite recursion takes place. - -This is an example with one possible implementation of include tags in XML ([@samples/include.cpp]). It illustrates node cloning and usage of other document modification functions: - -[import samples/include.cpp] -[code_include] - -[endsect] [/clone] - -[endsect] [/modify] - -[section:saving Saving document] - -Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format (see [sref manual.saving.options]), and also perform necessary encoding conversions (see [sref manual.saving.encoding]). This section documents the relevant functionality. - -The node/attribute data is written to the destination properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped. In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For proper output, make sure all node and attribute names are set to meaningful values. - -[caution Currently the content of CDATA sections is not escaped, so CDATA sections with values that contain `"]]>"` will result in malformed document. This will be fixed in version 1.0.] - -[section:file Saving document to a file] - -[#xml_document::save_file] -If you want to save the whole document to a file, you can use the following function: - - bool xml_document::save_file(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - -This function accepts file path as its first argument, and also three optional arguments, which specify indentation and other output options (see [sref manual.saving.options]) and output data encoding (see [sref manual.saving.encoding]). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of target system, it should have the exact case if target file system is case-sensitive, etc. File path is passed to system file opening function as is. - -[#xml_writer_file] -`save_file` opens the target file for writing, outputs the requested header (by default a document declaration is output, unless the document already has one), and then saves the document contents. If the file could not be opened, the function returns `false`. Calling `save_file` is equivalent to creating an `xml_writer_file` object with `FILE*` handle as the only constructor argument and then calling `save`; see [sref manual.saving.writer] for writer interface details. - -[note As of version 0.9, there is no function for saving XML document to wide character paths. Unfortunately, there is no portable way to do this; the version 1.0 will provide such function only for platforms with the corresponding functionality. You can use stream-saving functions as a workaround if your STL implementation can open file streams via wchar_t paths.] - -This is a simple example of saving XML document to file ([@samples/save_file.cpp]): - -[import samples/save_file.cpp] -[code_save_file] - -[endsect] [/file] - -[section:stream Saving document to C++ IOstreams] - -[#xml_document::save_stream] -For additional interoperability pugixml provides functions for saving document to any object which implements C++ std::ostream interface. This allows you to save documents to any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use `std::cout` stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones: - - void xml_document::save(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - void xml_document::save(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const; - -`save` with `std::ostream` argument saves the document to the stream in the same way as `save_file` (i.e. with requested header and with encoding conversions). On the other hand, `save` with `std::wstream` argument saves the document to the wide stream with `encoding_wchar` encoding. Because of this, using `save` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you with the ability to save documents to non-Unicode encodings, i.e. you can save Shift-JIS encoded data if you set the correct locale. - -[#xml_writer_stream] -Calling `save` with stream target is equivalent to creating an `xml_writer_stream` object with stream as the only constructor argument and then calling `save`; see [sref manual.saving.writer] for writer interface details. - -This is a simple example of saving XML document to standard output ([@samples/save_stream.cpp]): - -[import samples/save_stream.cpp] -[code_save_stream] - -[endsect] [/stream] - -[section:writer Saving document via writer interface] - -[#xml_document::save][#xml_writer][#xml_writer::write] -All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input: - - class xml_writer - { - public: - virtual void write(const void* data, size_t size) = 0; - }; - - void xml_document::save(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - -In order to output the document via some custom transport, for example sockets, you should create an object which implements `xml_writer_file` interface and pass it to `save` function. `xml_writer_file::write` function is called with a buffer as an input, where `data` points to buffer start, and `size` is equal to the buffer size in bytes. `write` implementation must write the buffer to the transport; it can not save the passed buffer pointer, as the buffer contents will change after `write` returns. The buffer contains the chunk of document data in the desired encoding. - -`write` function is called with relatively large blocks (size is usually several kilobytes, except for the first block with BOM, which is output only if `format_write_bom` is set, and last block, which may be small), so there is often no need for additional buffering in the implementation. - -This is a simple example of custom writer for saving document data to STL string ([@samples/save_custom_writer.cpp]); read the sample code for more complex examples: - -[import samples/save_custom_writer.cpp] -[code_save_custom_writer] - -[endsect] [/writer] - -[section:subtree Saving a single subtree] - -[#xml_node::print][#xml_node::print_stream] -While the previously described functions saved the whole document to the destination, it is easy to save a single subtree. The following functions are provided: - - void xml_node::print(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - void xml_node::print(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const; - void xml_node::print(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - -These functions have the same arguments with the same meaning as the corresponding `xml_document::save` functions, and allow you to save the subtree to either a C++ IOstream or to any object that implements `xml_writer` interface. - -Saving a subtree differs from saving the whole document: the process behaves as if `format_write_bom` is off, and `format_no_declaration` is on, even if actual values of the flags are different. This means that BOM is not written to the destination, and document declaration is only written if it is the node itself or is one of node's children. Note that this also holds if you're saving a document; this example ([@samples/save_subtree.cpp]) illustrates the difference: - -[import samples/save_subtree.cpp] -[code_save_subtree] - -[endsect] [/subtree] - -[section:options Output options] - -All saving functions accept the optional parameter `flags`. This is a bitmask that customizes the output format; you can select the way the document nodes are printed and select the needed additional information that is output before the document contents. - -[note You should use the usual bitwise arithmetics to manipulate the bitmask: to enable a flag, use `mask | flag`; to disable a flag, use `mask & ~flag`.] - -These flags control the resulting tree contents: - -* [anchor format_indent] determines if all nodes should be indented with the indentation string (this is an additional parameter for all saving functions, and is `"\t"` by default). If this flag is on, before every node the indentation string is output several times, where the amount of indentation depends on the node's depth relative to the output subtree. This flag has no effect if `format_raw` is enabled. This flag is *on* by default. -[lbr] - -* [anchor format_raw] switches between formatted and raw output. If this flag is on, the nodes are not indented in any way, and also no newlines that are not part of document text are printed. Raw mode can be used for serialization where the result is not intended to be read by humans; also it can be useful if the document was parsed with `parse_ws_pcdata` flag, to preserve the original document formatting as much as possible. This flag is *off* by default. - -These flags control the additional output information: - -* [anchor format_no_declaration] allows to disable default node declaration output. By default, if the document is saved via `save` or `save_file` function, and it does not have any document declaration, a default declaration is output before the document contents. Enabling this flag disables this declaration. This flag has no effect in `xml_node::print` functions: they never output the default declaration. This flag is *off* by default. -[lbr] - -* [anchor format_write_bom] allows to enable Byte Order Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 encodings the resulting document's encoding may not be recognized by some parsers and text editors, if they do not implement sophisticated encoding detection. Enabling this flag adds an encoding-specific BOM to the output. This flag has no effect in `xml_node::print` functions: they never output the BOM. This flag is *off* by default. - -Additionally, there is one predefined option mask: - -* [anchor format_default] is the default set of flags, i.e. it has all options set to their default values. It sets formatted output with indentation, without BOM and with default node declaration, if necessary. - -This is an example that shows the outputs of different output options ([@samples/save_options.cpp]): - -[import samples/save_options.cpp] -[code_save_options] - -[endsect] [/options] - -[section:encoding Encodings] - -pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions during output. The output encoding is set via the `encoding` parameter of saving functions, which is of type `xml_encoding`. The possible values for the encoding are documented in [sref manual.loading.encoding]; the only flag that has a different meaning is `encoding_auto`. - -While all other flags set the exact encoding, `encoding_auto` is meant for automatic encoding detection. The automatic detection does not make sense for output encoding, since there is usually nothing to infer the actual encoding from, so here `encoding_auto` means UTF-8 encoding, which is the most popular encoding for XML data storage. This is also the default value of output encoding; specify another value if you do not want UTF-8 encoded output. - -Also note that wide stream saving functions do not have `encoding` argument and always assume `encoding_wchar` encoding. - -[note The current behavior for Unicode conversion is to skip all invalid UTF sequences during conversion. This behavior should not be relied upon; if your node/attribute names do not contain any valid UTF sequences, they may be output as if they are empty, which will result in malformed XML document.] - -[endsect] [/encoding] - -[endsect] [/saving] - -[section:xpath XPath] - -If the task at hand is to select a subset of document nodes that match some criteria, it is possible to code a function using the existing traversal functionality for any practical criteria. However, often either a data-driven approach is desirable, in case the criteria are not predefined and come from a file, or it is inconvenient to use traversal interfaces and a higher-level DSL is required. There is a standard language for XML processing, XPath, that can be useful for these cases. pugixml implements an almost complete subset of XPath 1.0. Because of differences in document object model and some performance implications, there are minor violations of the official specifications, which can be found in [sref manual.xpath.w3c]. The rest of this section describes the interface for XPath functionality. Please note that if you wish to learn to use XPath language, you have to look for other tutorials or manuals; for example, you can read [@http://www.w3schools.com/xpath/ W3Schools XPath tutorial], [@http://www.tizag.com/xmlTutorial/xpathtutorial.php XPath tutorial at tizag.com], and [@http://www.w3.org/TR/xpath/ the XPath 1.0 specification]. - -[note As of version 0.9, you need both STL and exception support to use XPath; XPath is disabled if either `PUGIXML_NO_STL` or `PUGIXML_NO_EXCEPTIONS` is defined.] - -[section:types XPath types] - -[#xpath_value_type][#xpath_type_number][#xpath_type_string][#xpath_type_boolean][#xpath_type_node_set][#xpath_type_none] -Each XPath expression can have one of the following types: boolean, number, string or node set. Boolean type corresponds to `bool` type, number type corresponds to `double` type, string type corresponds to either `std::string` or `std::wstring`, depending on whether [link manual.dom.unicode wide character interface is enabled], and node set corresponds to `xpath_node_set` type. There is an enumeration, `xpath_value_type`, which can take the values `xpath_type_boolean`, `xpath_type_number`, `xpath_type_string` or `xpath_type_node_set`, accordingly. - -[#xpath_node][#xpath_node::node][#xpath_node::attribute][#xpath_node::parent] -Because an XPath node can be either a node or an attribute, there is a special type, `xpath_node`, which is a discriminated union of these types. A value of this type contains two node handles, one of `xml_node` type, and another one of `xml_attribute` type; at most one of them can be non-null. The accessors to get these handles are available: - - xml_node xpath_node::node() const; - xml_attribute xpath_node::attribute() const; - -XPath nodes can be null, in which case both accessors return null handles. - -Note that as per XPath specification, each XPath node has a parent, which can be retrieved via this function: - - xml_node xpath_node::parent() const; - -`parent` function returns the node's parent if the XPath node corresponds to `xml_node` handle (equivalent to `node().parent()`), or the node to which the attribute belongs to, if the XPath node corresponds to `xml_attribute` handle. For null nodes, `parent` returns null handle. - -[#xpath_node::unspecified_bool_type][#xpath_node::comparison] -Like node and attribute handles, XPath node handles can be implicitly cast to boolean-like object to check if it is a null node, and also can be compared for equality with each other. - -[#xpath_node::ctor] -You can also create XPath nodes with one of tree constructors: the default constructor, the constructor that takes node argument, and the constructor that takes attribute and node arguments (in which case the attribute must belong to the attribute list of the node). However, usually you don't need to create your own XPath node objects, since they are returned to you via selection functions. - -[#xpath_node_set] -XPath expressions operate not on single nodes, but instead on node sets. A node set is a collection of nodes, which can be optionally ordered in either a forward document order or a reverse one. Document order is defined in XPath specification; an XPath node is before another node in document order if it appears before it in XML representation of the corresponding document. - -[#xpath_node_set::const_iterator][#xpath_node_set::begin][#xpath_node_set::end] -Node sets are represented by `xpath_node_set` object, which has an interface that resembles one of sequential random-access containers. It has an iterator type along with usual begin/past-the-end iterator accessors: - - typedef const xpath_node* xpath_node_set::const_iterator; - const_iterator xpath_node_set::begin() const; - const_iterator xpath_node_set::end() const; - -[#xpath_node_set::index][#xpath_node_set::size][#xpath_node_set::empty] -And it also can be iterated via indices, just like `std::vector`: - - const xpath_node& xpath_node_set::operator[](size_t index) const; - size_t xpath_node_set::size() const; - bool xpath_node_set::empty() const; - -All of the above operations have the same semantics as that of `std::vector`: the iterators are random-access, all of the above operations are constant time, and accessing the element at index that is greater or equal than the set size results in undefined behavior. You can use both iterator-based and index-based access for iteration, however the iterator-based can be faster. - -[#xpath_node_set::type][#xpath_node_set::type_unsorted][#xpath_node_set::type_sorted][#xpath_node_set::type_sorted_reverse][#xpath_node_set::sort] -The order of iteration depends on the order of nodes inside the set; the order can be queried via the following function: - - enum xpath_node_set::type_t {type_unsorted, type_sorted, type_sorted_reverse}; - type_t xpath_node_set::type() const; - -`type` function returns the current order of nodes; `type_sorted` means that the nodes are in forward document order, `type_sorted_reverse` means that the nodes are in reverse document order, and `type_unsorted` means that neither order is guaranteed (nodes can accidentally be in a sorted order even if `type()` returns `type_unsorted`). If you require a specific order of iteration, you can change it via `sort` function: - - void xpath_node_set::sort(bool reverse = false); - -Calling `sort` sorts the nodes in either forward or reverse document order, depending on the argument; after this call `type()` will return `type_sorted` or `type_sorted_reverse`. - -[#xpath_node_set::first] -Often the actual iteration is not needed; instead, only the first element in document order is required. For this, a special accessor is provided: - - xpath_node xpath_node_set::first() const; - -This function returns the first node in forward document order from the set, or null node if the set is empty. Note that while the result of the node does not depend on the order of nodes in the set (i.e. on the result of `type()`), the complexity does - if the set is sorted, the complexity is constant, otherwise it is linear in the number of elements or worse. - -[endsect] [/types] - -[section:select Selecting nodes via XPath expression] - -[#xml_node::select_single_node][#xml_node::select_nodes] -If you want to select nodes that match some XPath expression, you can do it with the following functions: - - xpath_node xml_node::select_single_node(const char_t* query) const; - xpath_node_set xml_node::select_nodes(const char_t* query) const; - -`select_nodes` function compiles the expression and then executes it with the node as a context node, and returns the resulting node set. `select_single_node` returns only the first node in document order from the result, and is equivalent to calling `select_nodes(query).first()`. If the XPath expression does not match anything, or the node handle is null, `select_nodes` returns an empty set, and `select_single_node` returns null XPath node. - -Both functions throw `xpath_exception` if the query can not be compiled or if it returns a value with type other than node set; see [sref manual.xpath.errors] for details. - -[#xml_node::select_single_node_precomp][#xml_node::select_nodes_precomp] -While compiling expressions is fast, the compilation time can introduce a significant overhead if the same expression is used many times on small subtrees. If you're doing many similar queries, consider compiling them into query objects (see [sref manual.xpath.query] for further reference). Once you get a compiled query object, you can pass it to select functions instead of an expression string: - - xpath_node xml_node::select_single_node(const xpath_query& query) const; - xpath_node_set xml_node::select_nodes(const xpath_query& query) const; - -Both functions throw `xpath_exception` if the query returns a value with type other than node set. - -This is an example of selecting nodes using XPath expressions ([@samples/xpath_select.cpp]): - -[import samples/xpath_select.cpp] -[code_xpath_select] - -[endsect] [/select] - -[section:query Using query objects] - -[#xpath_query] -When you call `select_nodes` with an expression string as an argument, a query object is created behind the scene. A query object represents a compiled XPath expression. Query objects can be needed in the following circumstances: - -* You can precompile expressions to query objects to save compilation time if it becomes an issue; -* You can use query objects to evaluate XPath expressions which result in booleans, numbers or strings; -* You can get the type of expression value via query object. - -Query objects correspond to `xpath_query` type. They are immutable and non-copyable: they are bound to the expression at creation time and can not be cloned. If you want to put query objects in a container, allocate them on heap via `new` operator and store pointers to `xpath_query` in the container. - -[#xpath_query::ctor] -You can create a query object with the constructor that takes XPath expression as an argument: - - explicit xpath_query::xpath_query(const char_t* query); - -[#xpath_query::return_type] -The expression is compiled and the compiled representation is stored in the new query object. If compilation fails, `xpath_exception` is thrown (see [sref manual.xpath.errors] for details). After the query is created, you can query the type of the evaluation result using the following function: - - xpath_value_type xpath_query::return_type() const; - -[#xpath_query::evaluate_boolean][#xpath_query::evaluate_number][#xpath_query::evaluate_string][#xpath_query::evaluate_node_set] -You can evaluate the query using one of the following functions: - - bool xpath_query::evaluate_boolean(const xml_node& n) const; - double xpath_query::evaluate_number(const xml_node& n) const; - string_t xpath_query::evaluate_string(const xml_node& n) const; - xpath_node_set xpath_query::evaluate_node_set(const xml_node& n) const; - -All functions take the context node as an argument, compute the expression and return the result, converted to the requested type. By XPath specification, value of any type can be converted to boolean, number or string value, but no type other than node set can be converted to node set. Because of this, `evaluate_boolean`, `evaluate_number` and `evaluate_string` always return a result, but `evaluate_node_set` throws an `xpath_exception` if the return type is not node set. - -[note Calling `node.select_nodes("query")` is equivalent to calling `xpath_query("query").evaluate_node_set(node)`.] - -This is an example of using query objects ([@samples/xpath_query.cpp]): - -[import samples/xpath_query.cpp] -[code_xpath_query] - -[endsect] [/query] - -[section:errors Error handling] - -[#xpath_exception][#xpath_exception::what] -As of version 0.9, all XPath errors result in thrown exceptions. The errors can arise during expression compilation or node set evaluation. In both cases, an `xpath_exception` object is thrown. This is an exception object that implements `std::exception` interface, and thus has a single function `what()`: - - virtual const char* xpath_exception::what() const throw(); - -This function returns the error message. Currently it is impossible to get the exact place where query compilation failed. This functionality, along with optional error handling without exceptions, will be available in version 1.0. - -This is an example of XPath error handling ([@samples/xpath_error.cpp]): - -[import samples/xpath_error.cpp] -[code_xpath_error] - -[endsect] [/errors] - -[section:w3c Conformance to W3C specification] - -Because of the differences in document object models, performance considerations and implementation complexity, pugixml does not provide a fully conformant XPath 1.0 implementation. This is the current list of incompatibilities: - -* Consecutive text nodes sharing the same parent are not merged, i.e. in `text1 text2` node should have one text node children, but instead has three. -* Since document can't have a document type declaration, `id()` function always returns an empty node set. -* Namespace nodes are not supported (affects namespace:: axis). -* Name tests are performed on QNames in XML document instead of expanded names; for ``, query `foo/ns1:*` will return only the first child, not both of them. Compliant XPath implementations can return both nodes if the user provides appropriate namespace declarations. -* String functions consider a character to be either a single `char` value or a single `wchar_t` value, depending on the library configuration; this means that some string functions are not fully Unicode-aware. This affects `substring()`, `string-length()` and `translate()` functions. -* Variable references are not supported. - -Some of these incompatibilities will be fixed in version 1.0. - -[endsect] [/w3c] - -[endsect] [/xpath] - -[section:changes Changelog] - -[h5 1.07.2010 - version 0.9] - -Major release, featuring extended and improved Unicode support, miscellaneous performance improvements, bug fixes and more. - -* Major Unicode improvements: - # Introduced encoding support (automatic/manual encoding detection on load, manual encoding selection on save, conversion from/to UTF8, UTF16 LE/BE, UTF32 LE/BE) - # Introduced wchar_t mode (you can set PUGIXML_WCHAR_MODE define to switch pugixml internal encoding from UTF8 to wchar_t; all functions are switched to their Unicode variants) - # Load/save functions now support wide streams - -* Bug fixes: - # Fixed document corruption on failed parsing bug - # XPath string <-> number conversion improvements (increased precision, fixed crash for huge numbers) - # Improved DOCTYPE parsing: now parser recognizes all well-formed DOCTYPE declarations - # Fixed xml_attribute::as_uint() for large numbers (i.e. 2^32-1) - # Fixed xml_node::first_element_by_path for path components that are prefixes of node names, but are not exactly equal to them. - -* Specification changes: - # parse() API changed to load_buffer/load_buffer_inplace/load_buffer_inplace_own; load_buffer APIs do not require zero-terminated strings. - # Renamed as_utf16 to as_wide - # Changed xml_node::offset_debug return type and xml_parse_result::offset type to ptrdiff_t - # Nodes/attributes with empty names are now printed as :anonymous - -* Performance improvements: - # Optimized document parsing and saving - # Changed internal memory management: internal allocator is used for both metadata and name/value data; allocated pages are deleted if all allocations from them are deleted - # Optimized memory consumption: sizeof(xml_node_struct) reduced from 40 bytes to 32 bytes on x86 - # Optimized debug mode parsing/saving by order of magnitude - -* Miscellaneous: - # All STL includes except in pugixml.hpp are replaced with forward declarations - # xml_node::remove_child and xml_node::remove_attribute now return the operation result - -* Compatibility: - # parse() and as_utf16 are left for compatibility (these functions are deprecated and will be removed in version 1.0) - # Wildcard functions, document_order/precompute_document_order functions, all_elements_by_name function and format_write_bom_utf8 flag are deprecated and will be removed in version 1.0 - # xpath_type_t enumeration was renamed to xpath_value_type; xpath_type_t is deprecated and will be removed in version 1.0 - -[h5 8.11.2009 - version 0.5] - -Major bugfix release. Changes: - -* XPath bugfixes: - # Fixed translate(), lang() and concat() functions (infinite loops/crashes) - # Fixed compilation of queries with empty literal strings ("") - # Fixed axis tests: they never add empty nodes/attributes to the resulting node set now - # Fixed string-value evaluation for node-set (the result excluded some text descendants) - # Fixed self:: axis (it behaved like ancestor-or-self::) - # Fixed following:: and preceding:: axes (they included descendent and ancestor nodes, respectively) - # Minor fix for namespace-uri() function (namespace declaration scope includes the parent element of namespace declaration attribute) - # Some incorrect queries are no longer parsed now (i.e. foo: *) - # Fixed text()/etc. node test parsing bug (i.e. foo[text()] failed to compile) - # Fixed root step (/) - it now selects empty node set if query is evaluated on empty node - # Fixed string to number conversion ("123 " converted to NaN, "123 .456" converted to 123.456 - now the results are 123 and NaN, respectively) - # Node set copying now preserves sorted type; leads to better performance on some queries - -* Miscellaneous bugfixes: - # Fixed xml_node::offset_debug for PI nodes - # Added empty attribute checks to xml_node::remove_attribute - # Fixed node_pi and node_declaration copying - # Const-correctness fixes - -* Specification changes: - # xpath_node::select_nodes() and related functions now throw exception if expression return type is not node set (instead of assertion) - # xml_node::traverse() now sets depth to -1 for both begin() and end() callbacks (was 0 at begin() and -1 at end()) - # In case of non-raw node printing a newline is output after PCDATA inside nodes if the PCDATA has siblings - # UTF8 -> wchar_t conversion now considers 5-byte UTF8-like sequences as invalid - -* New features: - # Added xpath_node_set::operator[] for index-based iteration - # Added xpath_query::return_type() - # Added getter accessors for memory-management functions - -[h5 17.09.2009 - version 0.42] - -Maintenance release. Changes: - -* Bug fixes: - # Fixed deallocation in case of custom allocation functions or if delete[] / free are incompatible - # XPath parser fixed for incorrect queries (i.e. incorrect XPath queries should now always fail to compile) - # Const-correctness fixes for find_child_by_attribute - # Improved compatibility (miscellaneous warning fixes, fixed cstring include dependency for GCC) - # Fixed iterator begin/end and print function to work correctly for empty nodes - -* New features: - # Added PUGIXML_API/PUGIXML_CLASS/PUGIXML_FUNCTION configuration macros to control class/function attributes - # Added xml_attribute::set_value overloads for different types - -[h5 8.02.2009 - version 0.41] - -Maintenance release. Changes: - -* Bug fixes: - # Fixed bug with node printing (occasionally some content was not written to output stream) - -[h5 18.01.2009 - version 0.4] - -Changes: - -* Bug fixes: - # Documentation fix in samples for parse() with manual lifetime control - # Fixed document order sorting in XPath (it caused wrong order of nodes after xpath_node_set::sort and wrong results of some XPath queries) - -* Node printing changes: - # Single quotes are no longer escaped when printing nodes - # Symbols in second half of ASCII table are no longer escaped when printing nodes; because of this, format_utf8 flag is deleted as it's no longer needed and format_write_bom is renamed to format_write_bom_utf8. - # Reworked node printing - now it works via xml_writer interface; implementations for FILE* and std::ostream are available. As a side-effect, xml_document::save_file now works without STL. - -* New features: - # Added unsigned integer support for attributes (xml_attribute::as_uint, xml_attribute::operator=) - # Now document declaration () is parsed as node with type node_declaration when parse_declaration flag is specified (access to encoding/version is performed as if they were attributes, i.e. doc.child("xml").attribute("version").as_float()); corresponding flags for node printing were also added - # Added support for custom memory management (see set_memory_management_functions for details) - # Implemented node/attribute copying (see xml_node::insert\_copy_* and xml_node::append_copy for details) - # Added find_child_by_attribute and find_child_by_attribute_w to simplify parsing code in some cases (i.e. COLLADA files) - # Added file offset information querying for debugging purposes (now you're able to determine exact location of any xml_node in parsed file, see xml_node::offset_debug for details) - # Improved error handling for parsing - now load(), load_file() and parse() return xml_parse_result, which contains error code and last parsed offset; this does not break old interface as xml_parse_result can be implicitly casted to bool. - -[h5 31.10.2007 - version 0.34] - -Maintenance release. Changes: - -* Bug fixes: - # Fixed bug with loading from text-mode iostreams - # Fixed leak when transfer_ownership is true and parsing is failing - # Fixed bug in saving (\\r and \\n are now escaped in attribute values) - # Renamed free() to destroy() - some macro conflicts were reported - -* New features: - # Improved compatibility (supported Digital Mars C++, MSVC 6, CodeWarrior 8, PGI C++, Comeau, supported PS3 and XBox360) - # PUGIXML_NO_EXCEPTION flag for platforms without exception handling - -[h5 21.02.2007 - version 0.3] - -Refactored, reworked and improved version. Changes: - -* Interface: - # Added XPath - # Added tree modification functions - # Added no STL compilation mode - # Added saving document to file - # Refactored parsing flags - # Removed xml_parser class in favor of xml_document - # Added transfer ownership parsing mode - # Modified the way xml_tree_walker works - # Iterators are now non-constant - -* Implementation: - # Support of several compilers and platforms - # Refactored and sped up parsing core - # Improved standard compliancy - # Added XPath implementation - # Fixed several bugs - -[h5 6.11.2006 - version 0.2] - -First public release. Changes: - -* Bug fixes: - # Fixed child_value() (for empty nodes) - # Fixed xml_parser_impl warning at W4 - -* New features: - # Introduced child_value(name) and child_value_w(name) - # parse_eol_pcdata and parse_eol_attribute flags + parse_minimal optimizations - # Optimizations of strconv_t - -[h5 15.07.2006 - version 0.1] - -First private release for testing purposes - -[endsect] [/changes] - -[section:apiref API Reference] - -This is the reference for all macros, types, enumerations, classes and functions in pugixml. Each symbol is a link that leads to the relevant section of the manual. - -Macros: - -* `#define `[link PUGIXML_WCHAR_MODE] -* `#define `[link PUGIXML_NO_XPATH] -* `#define `[link PUGIXML_NO_STL] -* `#define `[link PUGIXML_NO_EXCEPTIONS] -* `#define `[link PUGIXML_API] -* `#define `[link PUGIXML_CLASS] -* `#define `[link PUGIXML_FUNCTION] - -Types: - -* `typedef `/configuration-defined type/` `[link char_t]`;` -* `typedef `/configuration-defined type/` `[link string_t]`;` -* `typedef void* (*`[link allocation_function]`)(size_t size);` -* `typedef void (*`[link deallocation_function]`)(void* ptr);` - -Enumerations: - -* `enum `[link xml_node_type] - * [link node_null] - * [link node_document] - * [link node_element] - * [link node_pcdata] - * [link node_cdata] - * [link node_comment] - * [link node_pi] - * [link node_declaration] - [lbr] - -* `enum `[link xml_parse_status] - * [link status_ok] - * [link status_file_not_found] - * [link status_io_error] - * [link status_out_of_memory] - * [link status_internal_error] - * [link status_unrecognized_tag] - * [link status_bad_pi] - * [link status_bad_comment] - * [link status_bad_cdata] - * [link status_bad_doctype] - * [link status_bad_pcdata] - * [link status_bad_start_element] - * [link status_bad_attribute] - * [link status_bad_end_element] - * [link status_end_element_mismatch] - [lbr] - -* `enum `[link xml_encoding] - * [link encoding_auto] - * [link encoding_utf8] - * [link encoding_utf16_le] - * [link encoding_utf16_be] - * [link encoding_utf16] - * [link encoding_utf32_le] - * [link encoding_utf32_be] - * [link encoding_utf32] - * [link encoding_wchar] - [lbr] - -* `enum `[link xpath_value_type] - * [link xpath_type_none] - * [link xpath_type_node_set] - * [link xpath_type_number] - * [link xpath_type_string] - * [link xpath_type_boolean] - -Constants: - -* Formatting options bit flags: - * [link format_default] - * [link format_indent] - * [link format_no_declaration] - * [link format_raw] - * [link format_write_bom] - [lbr] - -* Parsing options bit flags: - * [link parse_cdata] - * [link parse_comments] - * [link parse_declaration] - * [link parse_default] - * [link parse_eol] - * [link parse_escapes] - * [link parse_minimal] - * [link parse_pi] - * [link parse_ws_pcdata] - * [link parse_wconv_attribute] - * [link parse_wnorm_attribute] - -Classes: - -* `class `[link xml_attribute] - * [link xml_attribute::ctor xml_attribute]`();` - [lbr] - - * `bool `[link xml_attribute::empty empty]`() const;` - * `operator `[link xml_attribute::unspecified_bool_type unspecified_bool_type]`() const;` - [lbr] - - * `bool `[link xml_attribute::comparison operator==]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator!=]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator<]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator>]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator<=]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator>=]`(const xml_attribute& r) const;` - [lbr] - - * `xml_attribute `[link xml_attribute::next_attribute next_attribute]`() const;` - * `xml_attribute `[link xml_attribute::previous_attribute previous_attribute]`() const;` - [lbr] - - * `const char_t* `[link xml_attribute::name name]`() const;` - * `const char_t* `[link xml_attribute::value value]`() const;` - [lbr] - - * `int `[link xml_attribute::as_int as_int]`() const;` - * `unsigned int `[link xml_attribute::as_uint as_uint]`() const;` - * `double `[link xml_attribute::as_double as_double]`() const;` - * `float `[link xml_attribute::as_float as_float]`() const;` - * `bool `[link xml_attribute::as_bool as_bool]`() const;` - [lbr] - - * `bool `[link xml_attribute::set_name set_name]`(const char_t* rhs);` - * `bool `[link xml_attribute::set_value set_value]`(const char_t* rhs);` - * `bool `[link xml_attribute::set_value set_value]`(int rhs);` - * `bool `[link xml_attribute::set_value set_value]`(unsigned int rhs);` - * `bool `[link xml_attribute::set_value set_value]`(double rhs);` - * `bool `[link xml_attribute::set_value set_value]`(bool rhs);` - [lbr] - - * `xml_attribute& `[link xml_attribute::assign operator=]`(const char_t* rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(int rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(unsigned int rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(double rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(bool rhs);` - [lbr] - -* `class `[link xml_node] - * [link xml_node::ctor xml_node]`();` - [lbr] - - * `bool `[link xml_node::empty empty]`() const;` - * `operator `[link xml_node::unspecified_bool_type unspecified_bool_type]`() const;` - [lbr] - - * `bool `[link xml_node::comparison operator==]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator!=]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator<]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator>]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator<=]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator>=]`(const xml_node& r) const;` - [lbr] - - * `xml_node_type `[link xml_node::type type]`() const;` - [lbr] - - * `const char_t* `[link xml_node::name name]`() const;` - * `const char_t* `[link xml_node::value value]`() const;` - [lbr] - - * `xml_node `[link xml_node::parent parent]`() const;` - * `xml_node `[link xml_node::first_child first_child]`() const;` - * `xml_node `[link xml_node::last_child last_child]`() const;` - * `xml_node `[link xml_node::next_sibling next_sibling]`() const;` - * `xml_node `[link xml_node::previous_sibling previous_sibling]`() const;` - [lbr] - - * `xml_attribute `[link xml_node::first_attribute first_attribute]`() const;` - * `xml_attribute `[link xml_node::last_attribute last_attribute]`() const;` - [lbr] - - * `xml_node `[link xml_node::child child]`(const char_t* name) const;` - * `xml_attribute `[link xml_node::attribute attribute]`(const char_t* name) const;` - * `xml_node `[link xml_node::next_sibling_name next_sibling]`(const char_t* name) const;` - * `xml_node `[link xml_node::previous_sibling_name previous_sibling]`(const char_t* name) const;` - * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;` - * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* attr_name, const char_t* attr_value) const;` - [lbr] - - * `const char_t* `[link xml_node::child_value child_value]`() const;` - * `const char_t* `[link xml_node::child_value child_value]`(const char_t* name) const;` - [lbr] - - * `typedef xml_node_iterator `[link xml_node_iterator iterator]`;` - * `iterator `[link xml_node::begin begin]`() const;` - * `iterator `[link xml_node::end end]`() const;` - [lbr] - - * `typedef xml_attribute_iterator `[link xml_attribute_iterator attribute_iterator]`;` - * `attribute_iterator `[link xml_node::attributes_begin attributes_begin]`() const;` - * `attribute_iterator `[link xml_node::attributes_end attributes_end]`() const;` - [lbr] - - * `bool `[link xml_node::traverse traverse]`(xml_tree_walker& walker);` - [lbr] - - * `template xml_attribute `[link xml_node::find_attribute find_attribute]`(Predicate pred) const;` - * `template xml_node `[link xml_node::find_child find_child]`(Predicate pred) const;` - * `template xml_node `[link xml_node::find_node find_node]`(Predicate pred) const;` - [lbr] - - * `string_t `[link xml_node::path path]`(char_t delimiter = '/') const;` - * `xml_node `[link xml_node::first_element_by_path]`(const char_t* path, char_t delimiter = '/') const;` - * `xml_node `[link xml_node::root root]`() const;` - * `ptrdiff_t `[link xml_node::offset_debug offset_debug]`() const;` - [lbr] - - * `bool `[link xml_node::set_name set_name]`(const char_t* rhs);` - * `bool `[link xml_node::set_value set_value]`(const char_t* rhs);` - [lbr] - - * `xml_attribute `[link xml_node::append_attribute append_attribute]`(const char_t* name);` - * `xml_attribute `[link xml_node::insert_attribute_after insert_attribute_after]`(const char_t* name, const xml_attribute& attr);` - * `xml_attribute `[link xml_node::insert_attribute_before insert_attribute_before]`(const char_t* name, const xml_attribute& attr);` - [lbr] - - * `xml_node `[link xml_node::append_child append_child]`(xml_node_type type = node_element);` - * `xml_node `[link xml_node::insert_child_after insert_child_after]`(xml_node_type type, const xml_node& node);` - * `xml_node `[link xml_node::insert_child_before insert_child_before]`(xml_node_type type, const xml_node& node);` - [lbr] - - * `xml_attribute `[link xml_node::append_copy append_copy]`(const xml_attribute& proto);` - * `xml_attribute `[link xml_node::insert_copy_after insert_copy_after]`(const xml_attribute& proto, const xml_attribute& attr);` - * `xml_attribute `[link xml_node::insert_copy_before insert_copy_before]`(const xml_attribute& proto, const xml_attribute& attr);` - [lbr] - - * `xml_node `[link xml_node::append_copy append_copy]`(const xml_node& proto);` - * `xml_node `[link xml_node::insert_copy_after insert_copy_after]`(const xml_node& proto, const xml_node& node);` - * `xml_node `[link xml_node::insert_copy_before insert_copy_before]`(const xml_node& proto, const xml_node& node);` - [lbr] - - * `bool `[link xml_node::remove_attribute remove_attribute]`(const xml_attribute& a);` - * `bool `[link xml_node::remove_attribute remove_attribute]`(const char_t* name);` - * `bool `[link xml_node::remove_child remove_child]`(const xml_node& n);` - * `bool `[link xml_node::remove_child remove_child]`(const char_t* name);` - [lbr] - - * `void `[link xml_node::print print]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` - * `void `[link xml_node::print_stream print]`(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` - * `void `[link xml_node::print_stream print]`(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const;` - [lbr] - - * `xpath_node `[link xml_node::select_single_node select_single_node]`(const char_t* query) const;` - * `xpath_node `[link xml_node::select_single_node_precomp select_single_node]`(const xpath_query& query) const;` - * `xpath_node_set `[link xml_node::select_nodes select_nodes]`(const char_t* query) const;` - * `xpath_node_set `[link xml_node::select_nodes_precomp select_nodes]`(const xpath_query& query) const;` - [lbr] - -* `class `[link xml_document] - * [link xml_document::ctor xml_document]`();` - * `~`[link xml_document::dtor xml_document]`();` - [lbr] - - * `xml_parse_result `[link xml_document::load_stream load]`(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_stream load]`(std::wistream& stream, unsigned int options = parse_default);` - [lbr] - - * `xml_parse_result `[link xml_document::load_string load]`(const char_t* contents, unsigned int options = parse_default);` - [lbr] - - * `xml_parse_result `[link xml_document::load_file load_file]`(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - [lbr] - - * `xml_parse_result `[link xml_document::load_buffer load_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_buffer_inplace load_buffer_inplace]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_buffer_inplace_own load_buffer_inplace_own]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - [lbr] - - * `bool `[link xml_document::save_file save_file]`(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - [lbr] - - * `void `[link xml_document::save_stream save]`(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - * `void `[link xml_document::save_stream save]`(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const;` - [lbr] - - * `void `[link xml_document::save save]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - [lbr] - -* `struct `[link xml_parse_result] - * `xml_parse_status `[link xml_parse_result::status status]`;` - * `ptrdiff_t `[link xml_parse_result::offset offset]`;` - * `xml_encoding `[link xml_parse_result::encoding encoding]`;` - [lbr] - - * `operator `[link xml_parse_result::bool bool]`() const;` - * `const char* `[link xml_parse_result::description description]`() const;` - [lbr] - -* `class `[link xml_node_iterator] -* `class `[link xml_attribute_iterator] -[lbr] - -* `class `[link xml_tree_walker] - * `virtual bool `[link xml_tree_walker::begin begin]`(xml_node& node);` - * `virtual bool `[link xml_tree_walker::for_each for_each]`(xml_node& node) = 0;` - * `virtual bool `[link xml_tree_walker::end end]`(xml_node& node);` - [lbr] - - * `int `[link xml_tree_walker::depth depth]`() const;` - [lbr] - -* `class `[link xml_writer] - * `virtual void `[link xml_writer::write write]`(const void* data, size_t size) = 0;` - [lbr] - -* `class `[link xml_writer_file]`: public xml_writer` - * [link xml_writer_file]`(void* file);` - [lbr] - -* `class `[link xml_writer_stream]`: public xml_writer` - * [link xml_writer_stream]`(std::ostream& stream);` - * [link xml_writer_stream]`(std::wostream& stream);` - [lbr] - -* `class `[link xpath_query] - * `explicit `[link xpath_query::ctor xpath_query]`(const char_t* query);` - [lbr] - - * `bool `[link xpath_query::evaluate_boolean evaluate_boolean]`(const xml_node& n) const;` - * `double `[link xpath_query::evaluate_number evaluate_number]`(const xml_node& n) const;` - * `string_t `[link xpath_query::evaluate_string evaluate_string]`(const xml_node& n) const;` - * `xpath_node_set `[link xpath_query::evaluate_node_set evaluate_node_set]`(const xml_node& n) const;` - [lbr] - - * `xpath_value_type `[link xpath_query::return_type return_type]`() const;` - [lbr] - -* `class `[link xpath_exception]`: public std::exception` - * `virtual const char* `[link xpath_exception::what what]`() const throw();` - [lbr] - -* `class `[link xpath_node] - * [link xpath_node::ctor xpath_node]`();` - * [link xpath_node::ctor xpath_node]`(const xml_node& node);` - * [link xpath_node::ctor xpath_node]`(const xml_attribute& attribute, const xml_node& parent);` - [lbr] - - * `xml_node `[link xpath_node::node node]`() const;` - * `xml_attribute `[link xpath_node::attribute attribute]`() const;` - * `xml_node `[link xpath_node::parent parent]`() const;` - [lbr] - - * `operator `[link xpath_node::unspecified_bool_type unspecified_bool_type]`() const;` - * `bool `[link xpath_node::comparison operator==]`(const xpath_node& n) const;` - * `bool `[link xpath_node::comparison operator!=]`(const xpath_node& n) const;` - [lbr] - -* `class `[link xpath_node_set] - * `typedef const xpath_node* `[link xpath_node_set::const_iterator const_iterator]`;` - * `const_iterator `[link xpath_node_set::begin begin]`() const;` - * `const_iterator `[link xpath_node_set::end end]`() const;` - [lbr] - - * `const xpath_node& `[link xpath_node_set::index operator\[\]]`(size_t index) const;` - * `size_t `[link xpath_node_set::size size]`() const;` - * `bool `[link xpath_node_set::empty empty]`() const;` - [lbr] - - * `xpath_node `[link xpath_node_set::first first]`() const;` - [lbr] - - * `enum type_t {`[link xpath_node_set::type_unsorted type_unsorted], [link xpath_node_set::type_sorted type_sorted], [link xpath_node_set::type_sorted_reverse type_sorted_reverse]`};` - * `type_t `[link xpath_node_set::type type]`() const;` - * `void `[link xpath_node_set::sort sort]`(bool reverse = false);` - -Functions: - -* [link as_utf8] -* [link as_wide] -* [link get_memory_allocation_function] -* [link get_memory_deallocation_function] -* [link set_memory_management_functions] - -[endsect] [/apiref] - -[section:toc Table of Contents] - -toc-placeholder - -[endsect] [/toc] - -[/ vim:et ] +[book pugixml + [quickbook 1.5] + + [version 0.9] + [id manual] + [copyright 2010 Arseny Kapoulkine] + [license Distributed under the MIT License] +] + +[template sbr[]''''''] +[template lbr[]''''''] [/ for empty lines in lists] +[template file[name]''''''[name]''''''] +[template sref[name]''''''] +[template anchor[name]''''''[^[name]]] +[template ftnt[id text]''''''[text]''''''] + +[section:overview Overview] + +[section:introduction Introduction] + +pugixml is a light-weight C++ XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0 implementation for complex data-driven tree queries. Full Unicode support is also available, with [link manual.dom.unicode two Unicode interface variants] and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is [link manual.install.portability extremely portable] and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the MIT license, making it completely free to use in both open-source and proprietary applications. + +pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can't process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD/Schema validation, the library is not for you. + +This is the complete manual for pugixml, which describes all features of the library in detail. If you want to start writing code as quickly as possible, you are advised to [@quickstart.html read the quick start guide first]. + +[note No documentation is perfect, neither is this one. If you encounter a description that is unclear, please file an issue as described in [sref manual.overview.feedback]. Also if you can spare the time for a full proof-reading, including spelling and grammar, that would be great! Please [link email send me an e-mail]; as a token of appreciation, your name will be included into the [link manual.overview.thanks corresponding section] of this documentation.] + +[endsect] [/introduction] + +[section:feedback Feedback] + +If you believe you've found a bug in pugixml (bugs include compilation problems (errors/warnings), crashes, performance degradation and incorrect behavior), please file an issue via [@http://code.google.com/p/pugixml/issues/entry issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. + +Feature requests can be reported the same way as bugs, so if you're missing some functionality in pugixml or if the API is rough in some places and you can suggest an improvement, file an issue. However please note that there are many factors when considering API changes (compatibility with previous versions, API redundancy, etc.), so generally features that can be implemented via a small function without pugixml modification are not accepted. However, all rules have exceptions. + +If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some language other than C++, please file an issue. You can include the relevant patches as issue attachments. Your contribution has to be distributed under the terms of a license that's compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. + +[#email] +If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: [@mailto:arseny.kapoulkine@gmail.com arseny.kapoulkine@gmail.com]. + +[endsect] [/feedback] + +[section:thanks Acknowledgments] + +pugixml could not be developed without the help from many people; some of them are listed in this section. If you've played a part in pugixml development and you can not find yourself on this list, I'm truly sorry; please [link email send me an e-mail] so I can fix this. + +Thanks to *Kristen Wegner* for pugxml parser, which was used as a basis for pugixml. + +Thanks to *Neville Franks* for contributions to pugxml parser. + +Thanks to *Artyom Palvelev* for suggesting a lazy gap contraction approach. + +Thanks to *Vyacheslav Egorov* for documentation proofreading. + +[endsect] [/thanks] + +[section:license License] + +The pugixml library is distributed under the MIT license: + +[: +Copyright (c) 2006-2010 Arseny Kapoulkine + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. +] + +[endsect] [/license] + +[endsect] [/overview] + +[section:install Installation] + +[section:getting Getting pugixml] + +pugixml is distributed in source form. You can either download a source distribution or checkout the Subversion repository. + +[section:source Source distributions] + +You can download the latest source distribution via one of the following links: + +[pre +[@http://pugixml.googlecode.com/files/pugixml-0.9.zip] +[@http://pugixml.googlecode.com/files/pugixml-0.9.tar.gz] +] + +The distribution contains library source, documentation (the manual you're reading now and the quick start guide) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. + +If you need an older version, you can download it from the [@http://code.google.com/p/pugixml/downloads/list version archive]. + +[endsect] [/source] + +[section:subversion Subversion repository] + +The Subversion repository is located at [@http://pugixml.googlecode.com/svn/]. There is a Subversion tag "release-{version}" for each version; also there is the "latest" tag, which always points to the latest stable release. + +For example, to checkout the current version, you can use this command: + +[pre svn checkout http://pugixml.googlecode.com/svn/tags/release-0.9 pugixml] + +To checkout the latest version, you can use this command: + +[pre svn checkout http://pugixml.googlecode.com/svn/tags/latest pugixml] + +The repository contains library source, documentation, code examples and full unit test suite. + +Use latest version tag if you want to automatically get new versions via =svn update=. Use other tags if you want to switch to new versions only explicitly (for example, using =svn switch= command). Also please note that Subversion trunk contains the work-in-progress version of the code; while this means that you can get new features and bug fixes from trunk without waiting for a new release, this also means that occasionally the code can be broken in some configurations. + +[endsect] [/subversion] + +[endsect] [/getting] + +[section:building Building pugixml] + +pugixml is distributed in source form without any pre-built binaries; you have to build them yourself. + +The complete pugixml source consists of four files - two source files, [file pugixml.cpp] and [file pugixpath.cpp], and two header files, [file pugixml.hpp] and [file pugiconfig.hpp]. [file pugixml.hpp] is the primary header which you need to include in order to use pugixml classes/functions; [file pugiconfig.hpp] is a supplementary configuration file (see [sref manual.install.building.config]). The rest of this guide assumes that [file pugixml.hpp] is either in the current directory or in one of include directories of your projects, so that `#include "pugixml.hpp"` can find the header; however you can also use relative path (i.e. `#include "../libs/pugixml/src/pugixml.hpp"`) or include directory-relative path (i.e. `#include `). + +[note You don't need to compile [file pugixpath.cpp] unless you use XPath.] + +[section:embed Building pugixml as a part of another static library/executable] + +The easiest way to build pugixml is to compile two source files, [file pugixml.cpp] and [file pugixpath.cpp], along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio[ftnt trademarks All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add [file pugixml.cpp] and [file pugixpath.cpp] to one of your projects. + +If you're using Microsoft Visual Studio and the project has precompiled headers turned on, you'll see the following error messages: + +[pre pugixpath.cpp(3477) : fatal error C1010: unexpected end of file while looking for precompiled header. Did you forget to add '#include "stdafx.h"' to your source?] + +The correct way to resolve this is to disable precompiled headers for [file pugixml.cpp] and [file pugixpath.cpp]; you have to set "Create/Use Precompiled Header" option (Properties dialog -> C/C++ -> Precompiled Headers -> Create/Use Precompiled Header) to "Not Using Precompiled Headers". You'll have to do it for both [file pugixml.cpp] and [file pugixpath.cpp], for all project configurations/platforms (you can select Configuration "All Configurations" and Platform "All Platforms" before editing the option): + +[table +[[ +[@images/vs2005_pch1.png [$images/vs2005_pch1_thumb.png]] +[$images/next.png] +[@images/vs2005_pch2.png [$images/vs2005_pch2_thumb.png]] +[$images/next.png] +[@images/vs2005_pch3.png [$images/vs2005_pch3_thumb.png]] +[$images/next.png] +[@images/vs2005_pch4.png [$images/vs2005_pch4_thumb.png]] +]] ] + +[endsect] [/embed] + +[section:static Building pugixml as a standalone static library] + +It's possible to compile pugixml as a standalone static library. This process depends on the method of building your application; pugixml distribution comes with project files for several popular IDEs/build systems. There are project files for Apple XCode3, Code::Blocks, Codelite, Microsoft Visual Studio 2005, 2008, 2010, and configuration scripts for CMake and premake4. You're welcome to submit project files/build scripts for other software; see [sref manual.overview.feedback]. + +There are two projects for each version of Microsoft Visual Studio: one for dynamically linked CRT, which has a name like [file pugixml_vs2008.vcproj], and another one for statically linked CRT, which has a name like [file pugixml_vs2008_static.vcproj]. You should select the version that matches the CRT used in your application; the default option for new projects created by Microsoft Visual Studio is dynamically linked CRT, so unless you changed the defaults, you should use the version with dynamic CRT (i.e. [file pugixml_vs2008.vcproj] for Microsoft Visual Studio 2008). + +In addition to adding pugixml project to your workspace, you'll have to make sure that your application links with pugixml library. If you're using Microsoft Visual Studio 2005/2008, you can add a dependency from your application project to pugixml one. If you're using Microsoft Visual Studio 2010, you'll have to add a reference to your application project instead. For other IDEs/systems, consult the relevant documentation. + +[table +[[Microsoft Visual Studio 2005/2008][Microsoft Visual Studio 2010]] +[[ +[@images/vs2005_link1.png [$images/vs2005_link1_thumb.png]] +[$images/next.png] +[@images/vs2005_link2.png [$images/vs2005_link2_thumb.png]] +][ +[@images/vs2010_link1.png [$images/vs2010_link1_thumb.png]] +[$images/next.png] +[@images/vs2010_link2.png [$images/vs2010_link2_thumb.png]] +]] ] + +[endsect] [/static] + +[section:shared Building pugixml as a standalone shared library] + +It's possible to compile pugixml as a standalone shared library. The process is usually similar to the static library approach; however, no preconfigured projects/scripts are included into pugixml distribution, so you'll have to do it yourself. Generally, if you're using GCC-based toolchain, the process does not differ from building any other library as DLL (adding -shared to compilation flags should suffice); if you're using MSVC-based toolchain, you'll have to explicitly mark exported symbols with a declspec attribute. You can do it by defining `PUGIXML_API` macro, i.e. via [file pugiconfig.hpp]: + + #ifdef _DLL + #define PUGIXML_API __declspec(dllexport) + #else + #define PUGIXML_API __declspec(dllimport) + #endif + +[endsect] [/shared] + +[section:config Additional configuration options] + +pugixml uses several defines to control the compilation process. There are two ways to define them: either put the needed definitions to [file pugiconfig.hpp] (it has some examples that are commented out) or provide them via compiler command-line. Define consistency is important, i.e. the definitions should match in all source files that include [file pugixml.hpp] (including pugixml sources) throughout the application. Adding defines to [file pugiconfig.hpp] lets you guarantee this, unless your macro definition is wrapped in preprocessor `#if`/`#ifdef` directive and this directive is not consistent. [file pugiconfig.hpp] will never contain anything but comments, which means that when upgrading to new version, you can safely leave your modified version intact. + +[anchor PUGIXML_WCHAR_MODE] define toggles between UTF-8 style interface (the in-memory text encoding is assumed to be UTF-8, most functions use `char` as character type) and UTF-16/32 style interface (the in-memory text encoding is assumed to be UTF-16/32, depending on `wchar_t` size, most functions use `wchar_t` as character type). See [sref manual.dom.unicode] for more details. + +[anchor PUGIXML_NO_XPATH] define disables XPath. Both XPath interfaces and XPath implementation are excluded from compilation; you can still compile the file [file pugixpath.cpp] (it will result in an empty translation unit). This option is provided in case you do not need XPath functionality and need to save code space. + +[anchor PUGIXML_NO_STL] define disables use of STL in pugixml. The functions that operate on STL types are no longer present (i.e. load/save via iostream) if this macro is defined. This option is provided in case your target platform does not have a standard-compliant STL implementation. + +[note As of version 0.9, STL is used in XPath implementation; therefore, XPath is also disabled if this macro is defined. This will change in version 1.0.] + +[anchor PUGIXML_NO_EXCEPTIONS] define disables use of exceptions in pugixml. This option is provided in case your target platform does not have exception handling capabilities + +[note As of version 0.9, exceptions are *only* used in XPath implementation; therefore, XPath is also disabled if this macro is defined. This will change in version 1.0.] + +[anchor PUGIXML_API], [anchor PUGIXML_CLASS] and [anchor PUGIXML_FUNCTION] defines let you specify custom attributes (i.e. declspec or calling conventions) for pugixml classes and non-member functions. In absence of `PUGIXML_CLASS` or `PUGIXML_FUNCTION` definitions, `PUGIXML_API` definition is used instead. For example, to specify fixed calling convention, you can define `PUGIXML_FUNCTION` to i.e. `__fastcall`. Another example is DLL import/export attributes in MSVC (see [sref manual.install.building.shared]). + +[note In that example `PUGIXML_API` is inconsistent between several source files; this is an exception to the consistency rule.] + +[endsect] [/config] + +[endsect] [/building] + +[section:portability Portability] + +pugixml is written in standard-compliant C++ with some compiler-specific workarounds where appropriate. pugixml is compatible with the upcoming C++0x standard (verified using GCC 4.5). Each version is tested with a unit test suite (with code coverage about 99%) on the following platforms: + +* Microsoft Windows: + * Borland C++ Compiler 5.82 + * Digital Mars C++ Compiler 8.51 + * Intel C++ Compiler 8.0, 9.0 x86/x64, 10.0 x86/x64, 11.0 x86/x64 + * Metrowerks CodeWarrior 8.0 + * Microsoft Visual C++ 6.0, 7.0 (2002), 7.1 (2003), 8.0 (2005) x86/x64, 9.0 (2008) x86/x64, 10.0 (2010) x86/x64 + * MinGW (GCC) 3.4, 4.4, 4.5, 4.6 x64 + +* Linux (GCC 4.4.3 x86/x64) +* FreeBSD (GCC 4.2.1 x86/x64) +* Apple MacOSX (GCC 4.0.1 x86/x64/PowerPC) +* Microsoft Xbox 360 +* Nintendo Wii (Metrowerks CodeWarrior 4.1) +* Sony Playstation Portable (GCC 3.4.2) +* Sony Playstation 3 (GCC 4.1.1, SNC 310.1) + +[endsect] [/portability] + +[endsect] [/install] + +[section:dom Document object model] + +pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from character stream (file, string, C++ I/O stream), then traversed via special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C++ I/O stream or custom transport). + +[section:tree Tree structure] + +The XML document is represented with a tree data structure. The root of the tree is the document itself, which corresponds to C++ type `xml_document`. Document has one or more child nodes, which correspond to C++ type `xml_node`. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C++ type `xml_attribute`, and some additional data (i.e. name). + +[#xml_node_type] +The tree nodes can be of one of the following types (which together form the enumeration `xml_node_type`): + +* Document node ([anchor node_document]) - this is the root of the tree, which consists of several child nodes. This node corresponds to `xml_document` class; note that `xml_document` is a sub-class of `xml_node`, so the entire node interface is also available. However, document node is special in several ways, which will be covered below. There can be only one document node in the tree; document node does not have any XML representation. +[lbr] + +* Element/tag node ([anchor node_element]) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. The example XML representation of element node is as follows: + + + +[:There are two element nodes here; one has name `"node"`, single attribute `"attr"` and single child `"child"`, another has name `"child"` and does not have any attributes or child nodes.] + +* Plain character data nodes ([anchor node_pcdata]) represent plain text in XML. PCDATA nodes have a value, but do not have name or children/attributes. Note that plain character data is not a part of the element node but instead has its own node; for example, an element node can have several child PCDATA nodes. The example XML representation of text node is as follows: + + text1 text2 + +[:Here `"node"` element has three children, two of which are PCDATA nodes with values `"text1"` and `"text2"`.] + +* Character data nodes ([anchor node_cdata]) represent text in XML that is quoted in a special way. CDATA nodes do not differ from PCDATA nodes except in XML representation - the above text example looks like this with CDATA: + + + +[:CDATA nodes make it easy to include non-escaped <, & and > characters in plain text. CDATA value can not contain the character sequence \]\]>, since it is used to determine the end of node contents.] + +* Comment nodes ([anchor node_comment]) represent comments in XML. Comment nodes have a value, but do not have name or children/attributes. The example XML representation of comment node is as follows: + + + +[:Here the comment node has value `"comment text"`. By default comment nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior by adding `parse_comments` flag.] + +* Processing instruction node ([anchor node_pi]) represent processing instructions (PI) in XML. PI nodes have a name and an optional value, but do not have children/attributes. The example XML representation of PI node is as follows: + + + +[:Here the name (also called PI target) is `"name"`, and the value is `"value"`. By default PI nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior by adding `parse_pi` flag.] + +* Declaration node ([anchor node_declaration]) represents document declarations in XML. Declaration nodes have a name (`"xml"`) and an optional collection of attributes, but does not have value or children. There can be only one declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of declaration node is as follows: + + + +[:Here the node has name `"xml"` and a single attribute with name `"version"` and value `"1.0"`. By default declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior by adding `parse_declaration` flag. Also, by default a dummy declaration is output when XML document is saved unless there is already a declaration in the document; you can disable this by adding `format_no_declaration` flag.] + +Finally, here is a complete example of XML document and the corresponding tree representation ([@samples/tree.xml]): + +[table + +[[ +`` + + + + some text + + some more text + + + + + + +`` +][ +[@images/dom_tree.png [$images/dom_tree_thumb.png]] +]]] + + +[endsect] [/tree] + +[section:cpp C++ interface] + +[note All pugixml classes and functions are located in `pugi` namespace; you have to either use explicit name qualification (i.e. `pugi::xml_node`), or to gain access to relevant symbols via `using` directive (i.e. `using pugi::xml_node;` or `using namespace pugi;`). The namespace will be omitted from declarations in this documentation hereafter; all code examples will use fully-qualified names.] + +Despite the fact that there are several node types, there are only three C++ types representing the tree (`xml_document`, `xml_node`, `xml_attribute`); some operations on `xml_node` are only valid for certain node types. They are described below. + +[#xml_document] +`xml_document` is the owner of the entire document structure; it is a non-copyable class. The interface of `xml_document` consists of loading functions (see [sref manual.loading]), saving functions (see [sref manual.saving]) and the interface of `xml_node`, which allows for document inspection and/or modification. Note that while `xml_document` is a sub-class of `xml_node`, `xml_node` is not a polymorphic type; the inheritance is only used to simplify usage. + +[#xml_document::ctor] +[#xml_document::dtor] +Default constructor of `xml_document` initializes the document to the tree with only a root node (document node). You can then populate it with data using either tree modification functions or loading functions; all loading functions destroy the previous tree with all occupied memory, which puts existing nodes/attributes from this document to invalid state. Destructor of `xml_document` also destroys the tree, thus the lifetime of the document object should exceed the lifetimes of any node/attribute handles that point to the tree. + +[caution While technically node/attribute handles can be alive when the tree they're referring to is destroyed, calling any member function of these handles results in undefined behavior. Thus it is recommended to make sure that the document is destroyed only after all references to its nodes/attributes are destroyed.] + +[#xml_node] +[#xml_node::type] +`xml_node` is the handle to document node; it can point to any node in the document, including document itself. There is a common interface for nodes of all types; the actual node type can be queried via `xml_node::type()` method. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. The size of `xml_node` is equal to that of a pointer, so it is nothing more than a lightweight wrapper around pointer; you can safely pass or return `xml_node` objects by value without additional overhead. + +[#node_null] +There is a special value of `xml_node` type, known as null node or empty node (such nodes have type `node_null`). It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don't do anything and return empty nodes/attributes or empty strings as their result (see documentation for specific functions for more detailed information). This is useful for chaining calls; i.e. you can get the grandparent of a node like so: `node.parent().parent()`; if a node is a null node or it does not have a parent, the first `parent()` call returns null node; the second `parent()` call then also returns null node, so you don't have to check for errors twice. + +[#xml_attribute] +`xml_attribute` is the handle to an XML attribute; it has the same semantics as `xml_node`, i.e. there can be several `xml_attribute` handles pointing to the same underlying object, there is a special null attribute value, which propagates to function results. + +[#xml_attribute::ctor] +[#xml_node::ctor] +Both `xml_node` and `xml_attribute` have the default constructor which initializes them to null objects. + +[#xml_attribute::comparison] +[#xml_node::comparison] +`xml_node` and `xml_attribute` try to behave like pointers, that is, they can be compared with other objects of the same type, making it possible to use them as keys of associative containers. All handles to the same underlying object are equal, and any two handles to different underlying objects are not equal. Null handles only compare as equal to themselves. The result of relational comparison can not be reliably determined from the order of nodes in file or other ways. Do not use relational comparison operators except for search optimization (i.e. associative container keys). + +[#xml_attribute::unspecified_bool_type] +[#xml_node::unspecified_bool_type] +[#xml_attribute::empty] +[#xml_node::empty] +Additionally handles they can be implicitly cast to boolean-like objects, so that you can test if the node\/attribute is empty by just doing `if (node) { ... }` or `if (!node) { ... } else { ... }`. Alternatively you can check if a given `xml_node`/`xml_attribute` handle is null by calling the following methods: + + bool xml_attribute::empty() const; + bool xml_node::empty() const; + +Nodes and attributes do not exist outside of document tree, so you can't create them without adding them to some document. Once underlying node/attribute objects are destroyed, the handles to those objects become invalid. While this means that destruction of the entire tree invalidates all node/attribute handles, it also means that destroying a subtree (by calling `remove_child`) or removing an attribute invalidates the corresponding handles. There is no way to check handle validity; you have to ensure correctness through external mechanisms. + +[endsect] [/cpp] + +[section:unicode Unicode interface] + +There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via `PUGIXML_WCHAR_MODE` define; you can set it via [file pugiconfig.hpp] or via preprocessor options, as discussed in [sref manual.install.building.config]. If this define is set, the wchar_t interface is used; otherwise (by default) the char interface is used. The exact wide character encoding is assumed to be either UTF-16 or UTF-32 and is determined based on size of `wchar_t` type. + +[note If size of `wchar_t` is 2, pugixml assumes UTF-16 encoding instead of UCS-2, which means that some characters are represented as two code points.] + +All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. For example, node name accessors look like this in char mode: + + const char* xml_node::name() const; + bool xml_node::set_name(const char* value); + +and like this in wchar_t mode: + + const wchar_t* xml_node::name() const; + bool xml_node::set_name(const wchar_t* value); + +[#char_t] +[#string_t] +There is a special type, `pugi::char_t`, that is defined as the character type and depends on the library configuration; it will be also used in the documentation hereafter. There is also a type `pugi::string_t`, which is defined as the STL string of the character type; it corresponds to `std::string` in char mode and to `std::wstring` in wchar_t mode. + +In addition to the interface, the internal implementation changes to store XML data as `pugi::char_t`; this means that these two modes have different memory usage characteristics. The conversion to `pugi::char_t` upon document loading and from `pugi::char_t` upon document saving happen automatically, which also carries minor performance penalty. The general advice however is to select the character mode based on usage scenario, i.e. if UTF-8 is inconvenient to process and most of your XML data is localized, wchar_t mode is probably a better choice. + +[#as_utf8] +[#as_wide] +There are cases when you'll have to convert string data between UTF-8 and wchar_t encodings; the following helper functions are provided for such purposes: + + std::string as_utf8(const wchar_t* str); + std::wstring as_wide(const char* str); + +Both functions accept null-terminated string as an argument `str`, and return the converted string. `as_utf8` performs conversion from UTF-16/32 to UTF-8; `as_wide` performs conversion from UTF-8 to UTF-16/32. Invalid UTF sequences are silently discarded upon conversion. `str` has to be a valid string; passing null pointer results in undefined behavior. + +[note Most examples in this documentation assume char interface and therefore will not compile with `PUGIXML_WCHAR_MODE`. This is to simplify the documentation; usually the only changes you'll have to make is to pass `wchar_t` string literals, i.e. instead of + +`pugi::xml_node node = doc.child("bookstore").find_child_by_attribute("book", "id", "12345");` + +you'll have to do + +`pugi::xml_node node = doc.child(L"bookstore").find_child_by_attribute(L"book", L"id", L"12345");`] + +[endsect] [/unicode] + +[section:thread Thread-safety guarantees] + +Almost all functions in pugixml have the following thread-safety guarantees: + +* it is safe to call free functions from multiple threads +* it is safe to perform concurrent read-only accesses to the same tree (all constant member functions do not modify the tree) +* it is safe to perform concurrent read/write accesses, if there is only one read or write access to the single tree at a time + +Concurrent modification and traversing of a single tree requires synchronization, for example via reader-writer lock. Modification includes altering document structure and altering individual node/attribute data, i.e. changing names/values. + +The only exception is `set_memory_management_functions`; it modifies global variables and as such is not thread-safe. Its usage policy has more restrictions, see [sref manual.dom.memory.custom]. + +[endsect] [/thread] + +[section:exception Exception guarantees] + +With the exception of XPath, pugixml itself does not throw any exceptions. Additionally, most pugixml functions have a no-throw exception guarantee. + +This is not applicable to functions that operate on STL strings or IOstreams; such functions have either strong guarantee (functions that operate on strings) or basic guarantee (functions that operate on streams). Also functions that call user-defined callbacks (i.e. `xml_node::traverse` or `xml_node::find_node`) do not provide any exception guarantees beyond the ones provided by callback. + +XPath functions may throw `xpath_exception` on parsing error; also, XPath implementation uses STL, and thus may throw i.e. `std::bad_alloc` in low memory conditions. Still, XPath functions provide strong exception guarantee. + +[endsect] [/exception] + +[section:memory Memory management] + +pugixml requests the memory needed for document storage in big chunks, and allocates document data inside those chunks. This section discusses replacing functions used for chunk allocation and internal memory management implementation. + +[section:custom Custom memory allocation/deallocation functions] + +[#allocation_function] +[#deallocation_function] +All memory for tree structure/data is allocated via globally specified functions, which default to malloc/free. You can set your own allocation functions with set_memory_management functions. The function interfaces are the same as that of malloc/free: + + typedef void* (*allocation_function)(size_t size); + typedef void (*deallocation_function)(void* ptr); + +[#set_memory_management_functions] +[#get_memory_allocation_function] +[#get_memory_deallocation_function] +You can use the following accessor functions to change or get current memory management functions: + + void set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); + allocation_function get_memory_allocation_function(); + deallocation_function get_memory_deallocation_function(); + +Allocation function is called with the size (in bytes) as an argument and should return a pointer to memory block with alignment that is suitable for pointer storage and size that is greater or equal to the requested one. If the allocation fails, the function has to return null pointer (throwing an exception from allocation function results in undefined behavior). Deallocation function is called with the pointer that was returned by the previous call or with a null pointer; null pointer deallocation should be handled as a no-op. If memory management functions are not thread-safe, library thread safety is not guaranteed. + +This is a simple example of custom memory management ([@samples/custom_memory_management.cpp]): + +[import samples/custom_memory_management.cpp] +[code_custom_memory_management_decl] +[code_custom_memory_management_call] + +When setting new memory management functions, care must be taken to make sure that there are no live pugixml objects. Otherwise when the objects are destroyed, the new deallocation function will be called with the memory obtained by the old allocation function, resulting in undefined behavior. + +[note Currently memory for XPath objects is allocated using default operators new/delete; this will change in the next version.] + +[endsect] [/custom] + +[section:internals Document memory management internals] + +Constructing a document object using the default constructor does not result in any allocations; document node is stored inside the `xml_document` object. + +When the document is loaded from file/buffer, unless an inplace loading function is used (see [sref manual.loading.memory]), a complete copy of character stream is made; all names/values of nodes and attributes are allocated in this buffer. This buffer is allocated via a single large allocation and is only freed when document memory is reclaimed (i.e. if the `xml_document` object is destroyed or if another document is loaded in the same object). Also when loading from file or stream, an additional large allocation may be performed if encoding conversion is required; a temporary buffer is allocated, and it is freed before load function returns. + +All additional memory, such as memory for document structure (node/attribute objects) and memory for node/attribute names/values is allocated in pages on the order of 32 kilobytes; actual objects are allocated inside the pages using a memory management scheme optimized for fast allocation/deallocation of many small objects. Because of the scheme specifics, the pages are only destroyed if all objects inside them are destroyed; also, generally destroying an object does not mean that subsequent object creation will reuse the same memory. This means that it is possible to devise a usage scheme which will lead to higher memory usage than expected; one example is adding a lot of nodes, and them removing all even numbered ones; not a single page is reclaimed in the process. However this is an example specifically crafted to produce unsatisfying behavior; in all practical usage scenarios the memory consumption is less than that of a general-purpose allocator because allocation meta-data is very small in size. + +[endsect] [/internals] + +[endsect] [/memory] + +[endsect] [/dom] + +[section:loading Loading document] + +pugixml provides several functions for loading XML data from various places - files, C++ iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed because of performance reasons. Also some XML transformations (i.e. EOL handling or attribute value normalization) can impact parsing speed and thus can be disabled. However for vast majority of XML documents there is no performance difference between different parsing options. Parsing options also control whether certain XML nodes are parsed; see [sref manual.loading.options] for more information. + +XML data is always converted to internal character format (see [sref manual.dom.unicode]) before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions automatically. Unless explicit encoding is specified, loading functions perform automatic encoding detection based on first few characters of XML data, so in almost all cases you do not have to specify document encoding. Encoding conversion is described in more detail in [sref manual.loading.encoding]. + +[section:file Loading document from file] + +[#xml_document::load_file] +The most common source of XML data is files; pugixml provides a separate function for loading XML document from file: + + xml_parse_result xml_document::load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + +This function accepts file path as its first argument, and also two optional arguments, which specify parsing options (see [sref manual.loading.options]) and input data encoding (see [sref manual.loading.encoding]). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of target system, it should have the exact case if target file system is case-sensitive, etc. File path is passed to system file opening function as is. + +`load_file` destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an `xml_parse_result` object; this object contains the operation status, and the related information (i.e. last successfully parsed position in the input file, if parsing fails). See [sref manual.loading.errors] for error handling details. + +[note As of version 0.9, there is no function for loading XML document from wide character path. Unfortunately, there is no portable way to do this; the version 1.0 will provide such function only for platforms with the corresponding functionality. You can use stream-loading functions as a workaround if your STL implementation can open file streams via `wchar_t` paths.] + +This is an example of loading XML document from file ([@samples/load_file.cpp]): + +[import samples/load_file.cpp] +[code_load_file] + +[endsect] [/file] + +[section:memory Loading document from memory] + +[#xml_document::load_buffer] +[#xml_document::load_buffer_inplace] +[#xml_document::load_buffer_inplace_own] +Sometimes XML data should be loaded from some other source than file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. All these scenarios require loading document from memory. First you should prepare a contiguous memory block with all XML data; then you have to invoke one of buffer loading functions. These functions will handle the necessary encoding conversions, if any, and then will parse the data into the corresponding XML tree. There are several buffer loading functions, which differ in the behavior and thus in performance/memory usage: + + xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + +All functions accept the buffer which is represented by a pointer to XML data, `contents`, and data size in bytes. Also there are two optional arguments, which specify parsing options (see [sref manual.loading.options]) and input data encoding (see [sref manual.loading.encoding]). The buffer does not have to be zero-terminated. + +`load_buffer` function works with immutable buffer - it does not ever modify the buffer. Because of this restriction it has to create a private buffer and copy XML data to it before parsing (applying encoding conversions if necessary). This copy operation carries a performance penalty, so inplace functions are provided - `load_buffer_inplace` and `load_buffer_inplace_own` store the document data in the buffer, modifying it in the process. In order for the document to stay valid, you have to make sure that the buffer's lifetime exceeds that of the tree if you're using inplace functions. In addition to that, `load_buffer_inplace` does not assume ownership of the buffer, so you'll have to destroy it yourself; `load_buffer_inplace_own` assumes ownership of the buffer and destroys it once it is not needed. This means that if you're using `load_buffer_inplace_own`, you have to allocate memory with pugixml allocation function (you can get it via [link get_memory_allocation_function]). + +The best way from the performance/memory point of view is to load document using `load_buffer_inplace_own`; this function has maximum control of the buffer with XML data so it is able to avoid redundant copies and reduce peak memory usage while parsing. This is the recommended function if you have to load the document from memory and performance is critical. + +[#xml_document::load_string] +There is also a simple helper function for cases when you want to load the XML document from null-terminated character string: + + xml_parse_result xml_document::load(const char_t* contents, unsigned int options = parse_default); + +It is equivalent to calling `load_buffer` with `size = strlen(contents)`. This function assumes native encoding for input data, so it does not do any encoding conversion. In general, this function is fine for loading small documents from string literals, but has more overhead and less functionality than buffer loading functions. + +This is an example of loading XML document from memory using different functions ([@samples/load_memory.cpp]): + +[import samples/load_memory.cpp] +[code_load_memory_decl] +[code_load_memory_buffer] +[code_load_memory_buffer_inplace] +[code_load_memory_buffer_inplace_own] +[code_load_memory_string] + +[endsect] [/memory] + +[section:stream Loading document from C++ IOstreams] + +[#xml_document::load_stream] +For additional interoperability pugixml provides functions for loading document from any object which implements C++ `std::istream` interface. This allows you to load documents from any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). There are two functions, one works with narrow character streams, another handles wide character ones: + + xml_parse_result xml_document::load(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result xml_document::load(std::wistream& stream, unsigned int options = parse_default); + +`load` with `std::istream` argument loads the document from stream from the current read position to the end, treating the stream contents as a byte stream of the specified encoding (with encoding autodetection as necessary). Thus calling `xml_document::load` on an opened `std::ifstream` object is equivalent to calling `xml_document::load_file`. + +`load` with `std::wstream` argument treats the stream contents as a wide character stream (encoding is always `encoding_wchar`). Because of this, using `load` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you the ability to load documents from non-Unicode encodings, i.e. you can load Shift-JIS encoded data if you set the correct locale. + +This is a simple example of loading XML document from file using streams ([@samples/load_stream.cpp]); read the sample code for more complex examples involving wide streams and locales: + +[import samples/load_stream.cpp] +[code_load_stream] + +Stream loading requires working seek/tell functions and therefore may fail when used with some stream implementations like gzstream. + +[endsect] [/stream] + +[section:errors Handling parsing errors] + +[#xml_parse_result] +All document loading functions return the parsing result via `xml_parse_result` object. It contains parsing status, the offset of last successfully parsed character from the beginning of the source stream, and the encoding of the source stream: + + struct xml_parse_result + { + xml_parse_status status; + ptrdiff_t offset; + xml_encoding encoding; + + operator bool() const; + const char* description() const; + }; + +[#xml_parse_status] +[#xml_parse_result::status] +Parsing status is represented as the `xml_parse_status` enumeration and can be one of the following: + +* [anchor status_ok] means that no error was encountered during parsing; the source stream represents the valid XML document which was fully parsed and converted to a tree. +[lbr] + +* [anchor status_file_not_found] is only returned by `load_file` function and means that file could not be opened. +* [anchor status_io_error] is returned by `load_file` function and by `load` functions with `std::istream`/`std::wstream` arguments; it means that some I/O error has occured during reading the file/stream. +* [anchor status_out_of_memory] means that there was not enough memory during some allocation; any allocation failure during parsing results in this error. +* [anchor status_internal_error] means that something went horribly wrong; currently this error does not occur +[lbr] + +* [anchor status_unrecognized_tag] means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as [^#]. +* [anchor status_bad_pi] means that parsing stopped due to incorrect document declaration/processing instruction +* [anchor status_bad_comment], [anchor status_bad_cdata], [anchor status_bad_doctype] and [anchor status_bad_pcdata] mean that parsing stopped due to the invalid construct of the respective type +* [anchor status_bad_start_element] means that parsing stopped because starting tag either had no closing `>` symbol or contained some incorrect symbol +* [anchor status_bad_attribute] means that parsing stopped because there was an incorrect attribute, such as an attribute without value or with value that is not quoted (note that `` is incorrect in XML) +* [anchor status_bad_end_element] means that parsing stopped because ending tag had incorrect syntax (i.e. extra non-whitespace symbols between tag name and `>`) +* [anchor status_end_element_mismatch] means that parsing stopped because the closing tag did not match the opening one (i.e. ``) or because some tag was not closed at all + +[#xml_parse_result::description] +`description()` member function can be used to convert parsing status to a string; the returned message is always in English, so you'll have to write your own function if you need a localized string. However please note that the exact messages returned by `description()` function may change from version to version, so any complex status handling should be based on `status` value. + +If parsing failed because the source data was not a valid XML, the resulting tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that was successfully parsed. Obviously, the last element may have an unexpected name/value; for example, if the attribute value does not end with the necessary quotation mark, like in [^` (document declaration) is not considered to be a PI. This flag is *off* by default. +[lbr] + +* [anchor parse_comments] determines if comments (nodes with type [link node_comment]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *off* by default. +[lbr] + +* [anchor parse_cdata] determines if CDATA sections (nodes with type [link node_cdata]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *on* by default. +[lbr] + +* [anchor parse_ws_pcdata] determines if PCDATA nodes (nodes with type [link node_pcdata]) that consist only of whitespace characters are to be put in DOM tree. Often whitespace-only data is not significant for the application, and the cost of allocating and storing such nodes (both memory and speed-wise) can be significant. For example, after parsing XML string ` `, `` element will have three children when `parse_ws_pcdata` is set (child with type `node_pcdata` and value `" "`, child with type `node_element` and name `"a"`, and another child with type `node_pcdata` and value `" "`), and only one child when `parse_ws_pcdata` is not set. This flag is *off* by default. + +These flags control the transformation of tree element contents: + +* [anchor parse_escapes] determines if character and entity references are to be expanded during the parsing process. Character references have the form [^&#...;] or [^&#x...;] ([^...] is Unicode numeric representation of character in either decimal ([^&#...;]) or hexadecimal ([^&#x...;]) form), entity references are [^<], [^>], [^&], [^'] and [^"] (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed in attribute values and PCDATA content. This flag is *on* by default. +[lbr] + +* [anchor parse_eol] determines if EOL handling (that is, replacing sequences `0x0d 0x0a` by a single `0x0a` character, and replacing all standalone `0x0d` characters by `0x0a`) is to be performed on input data (that is, comments contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. +[lbr] + +* [anchor parse_wconv_attribute] determines if attribute value normalization should be performed for all attributes. This means, that whitespace characters (new line, tab and space) are replaced with space (`' '`). New line characters are always treated as if `parse_eol` is set, i.e. `\r\n` is converted to single space. This flag is *on* by default. +[lbr] + +* [anchor parse_wnorm_attribute] determines if extended attribute value normalization should be performed for all attributes. This means, that after attribute values are normalized as if `parse_wconv_attribute` was set, leading and trailing space characters are removed, and all sequences of space characters are replaced by a single space character. The value of `parse_wconv_attribute` has no effect if this flag is on. This flag is *off* by default. + +[note `parse_wconv_attribute` option performs transformations that are required by W3C specification for attributes that are declared as [^CDATA]; `parse_wnorm_attribute` performs transformations required for [^NMTOKENS] attributes. In the absence of document type declaration all attributes behave as if they are declared as [^CDATA], thus `parse_wconv_attribute` is the default option.] + +Additionally there are two predefined option masks: + +* [anchor parse_minimal] has all options turned off. This option mask means that pugixml does not add declaration nodes, PI nodes, CDATA sections and comments to the resulting tree and does not perform any conversion for input data, so theoretically it is the fastest mode. However, as discussed above, in practice `parse_default` is usually equally fast. +[lbr] + +* [anchor parse_default] is the default set of flags, i.e. it has all options set to their default values. It includes parsing CDATA sections (comments/PIs are not parsed), performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed (by default) for performance reasons. + +This is an example of using different parsing options ([@samples/load_options.cpp]): + +[import samples/load_options.cpp] +[code_load_options] + +[endsect] [/options] + +[section:encoding Encodings] + +[#xml_encoding] +pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions. Most loading functions accept the optional parameter `encoding`. This is a value of enumeration type `xml_encoding`, that can have the following values: + +* [anchor encoding_auto] means that pugixml will try to guess the encoding based on source XML data. The algorithm is a modified version of the one presented in Appendix F.1 of XML recommendation; it tries to match the first few bytes of input data with the following patterns in strict order: +[lbr] + * If first four bytes match UTF-32 BOM (Byte Order Mark), encoding is assumed to be UTF-32 with the endianness equal to that of BOM; + * If first two bytes match UTF-16 BOM, encoding is assumed to be UTF-16 with the endianness equal to that of BOM; + * If first three bytes match UTF-8 BOM, encoding is assumed to be UTF-8; + * If first four bytes match UTF-32 representation of [^<], encoding is assumed to be UTF-32 with the corresponding endianness; + * If first four bytes match UTF-16 representation of [^`, calling `next_sibling` for a handle that points to `` results in a handle pointing to ``, and calling `previous_sibling` results in handle pointing to ``. If node does not have next/previous sibling (this happens if it is the last/first node in the list, respectively), the functions return null nodes. `first_attribute`, `last_attribute`, `next_attribute` and `previous_attribute` functions behave the same way as corresponding child node functions and allow to iterate through attribute list in the same way. + +[note Because of memory consumption reasons, attributes do not have a link to their parent nodes. Thus there is no `xml_attribute::parent()` function.] + +Calling any of the functions above on the null handle results in a null handle - i.e. `node.first_child().next_sibling()` returns the second child of `node`, and null handle if there is no children at all or if there is only one. + +With these functions, you can iterate through all child nodes and display all attributes like this ([@samples/traverse_base.cpp]): + +[code_traverse_base_basic] + +[endsect] [/basic] + +[section:nodedata Getting node data] + +[#xml_node::name][#xml_node::value] +Apart from structural information (parent, child nodes, attributes), nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. `node_document` nodes do not have name or value, `node_element` and `node_declaration` nodes always have a name but never have a value, `node_pcdata`, `node_cdata` and `node_comment` nodes never have a name but always have a value (it may be empty though), `node_pi` nodes always have a name and a value (again, value may be empty). In order to get node's name or value, you can use the following functions: + + const char_t* xml_node::name() const; + const char_t* xml_node::value() const; + +In case node does not have a name or value or if the node handle is null, both functions return empty strings - they never return null pointers. + +[#xml_node::child_value] +It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type `node_pcdata` with value `"This is a node"`. pugixml provides two helper functions to parse such data: + + const char_t* xml_node::child_value() const; + const char_t* xml_node::child_value(const char_t* name) const; + +`child_value()` returns the value of the first child with type `node_pcdata` or `node_cdata`; `child_value(name)` is a simple wrapper for `child(name).child_value()`. For the above example, calling `node.child_value("description")` and `description.child_value()` will both produce string `"This is a node"`. If there is no child with relevant type, or if the handle is null, `child_value` functions return empty string. + +There is an example of using some of these functions [link code_traverse_base_data at the end of the next section]. + +[endsect] [/nodedata] + +[section:attrdata Getting attribute data] + +[#xml_attribute::name][#xml_attribute::value] +All attributes have name and value, both of which are strings (value may be empty). There are two corresponding accessors, like for `xml_node`: + + const char_t* xml_attribute::name() const; + const char_t* xml_attribute::value() const; + +In case attribute handle is null, both functions return empty strings - they never return null pointers. + +[#xml_attribute::as_int][#xml_attribute::as_uint][#xml_attribute::as_double][#xml_attribute::as_float][#xml_attribute::as_bool] +In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type. The accessors are as follows: + + int xml_attribute::as_int() const; + unsigned int xml_attribute::as_uint() const; + double xml_attribute::as_double() const; + float xml_attribute::as_float() const; + bool xml_attribute::as_bool() const; + +`as_int`, `as_uint`, `as_double` and `as_float` convert attribute values to numbers. If attribute handle is null or attribute value is empty, `0` is returned. Otherwise, all leading whitespace characters are truncated, and the remaining string is parsed as a decimal number (`as_int` or `as_uint`) or as a floating point number in either decimal or scientific form (`as_double` or `as_float`). Any extra characters are silently discarded, i.e. `as_int` will return `1` for string `"1abc"`. + +In case the input string contains a number that is out of the target numeric range, the result is undefined. + +[caution Number conversion functions depend on current C locale as set with `setlocale`, so may return unexpected results if the locale is different from `"C"`.] + +`as_bool` converts attribute value to boolean as follows: if attribute handle is null or attribute value is empty, `false` is returned. Otherwise, `true` is returned if first character is one of `'1', 't', 'T', 'y', 'Y'`. This means that strings like `"true"` and `"yes"` are recognized as `true`, while strings like `"false"` and `"no"` are recognized as `false`. For more complex matching you'll have to write your own function. + +[note There are no portable 64-bit types in C++, so there is no corresponding conversion function. If your platform has a 64-bit integer, you can easily write a conversion function yourself.] + +[#code_traverse_base_data] +This is an example of using these functions, along with node data retrieval ones ([@samples/traverse_base.cpp]): + +[code_traverse_base_data] + +[endsect] [/attrdata] + +[section:contents Contents-based traversal functions] + +[#xml_node::child][#xml_node::attribute][#xml_node::next_sibling_name][#xml_node::previous_sibling_name] +Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose: + + xml_node xml_node::child(const char_t* name) const; + xml_attribute xml_node::attribute(const char_t* name) const; + xml_node xml_node::next_sibling(const char_t* name) const; + xml_node xml_node::previous_sibling(const char_t* name) const; + +`child` and `attribute` return the first child/attribute with the specified name; `next_sibling` and `previous_sibling` return the first sibling in the corresponding direction with the specified name. All string comparisons are case-sensitive. In case the node handle is null or there is no node\/attribute with the specified name, null handle is returned. + +`child` and `next_sibling` functions can be used together to loop through all child nodes with the desired name like this: + + for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) + +[#xml_node::find_child_by_attribute] +Occasionally the needed node is specified not by the unique name but instead by the value of some attribute; for example, it is common to have node collections with each node having a unique id: ` `. There are two functions for finding child nodes based on the attribute values: + + xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; + xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; + +The three-argument function returns the first child node with the specified name which has an attribute with the specified name/value; the two-argument function skips the name test for the node, which can be useful for searching in heterogeneous collections. If the node handle is null or if no node is found, null handle is returned. All string comparisons are case-sensitive. + +In all of the above functions, all arguments have to be valid strings; passing null pointers results in undefined behavior. + +This is an example of using these functions ([@samples/traverse_base.cpp]): + +[code_traverse_base_contents] + +[endsect] [/contents] + +[section:iterators Traversing node/attribute lists via iterators] + +[#xml_node_iterator][#xml_attribute_iterator][#xml_node::begin][#xml_node::end][#xml_node::attributes_begin][#xml_node::attributes_end] +Child node lists and attribute lists are simply double-linked lists; while you can use `previous_sibling`/`next_sibling` and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes: + + class xml_node_iterator; + class xml_attribute_iterator; + + typedef xml_node_iterator xml_node::iterator; + iterator xml_node::begin() const; + iterator xml_node::end() const; + + typedef xml_attribute_iterator xml_node::attribute_iterator; + attribute_iterator xml_node::attributes_begin() const; + attribute_iterator xml_node::attributes_end() const; + +`begin` and `attributes_begin` return iterators that point to the first node\/attribute, respectively; `end` and `attributes_end` return past-the-end iterator for node\/attribute list, respectively - this iterator can't be dereferenced, but decrementing it results in an iterator pointing to the last element in the list (except for empty lists, where decrementing past-the-end iterator is not defined). Past-the-end iterator is commonly used as a termination value for iteration loops (see sample below). If you want to get an iterator that points to an existing handle, you can construct the iterator with the handle as a single constructor argument, like so: `xml_node_iterator(node)`. For `xml_attribute_iterator`, you'll have to provide both an attribute and its parent node. + +`begin` and `end` return equal iterators if called on null node; such iterators can't be dereferenced. `attributes_begin` and `attributes_end` behave the same way. For correct iterator usage this means that child node\/attribute collections of null nodes appear to be empty. + +Both types of iterators have bidirectional iterator semantics (i.e. they can be incremented and decremented, but efficient random access is not supported) and support all usual iterator operations - comparison, dereference, etc. The iterators are invalidated if the node\/attribute objects they're pointing to are removed from the tree; adding nodes\/attributes does not invalidate any iterators. + +Here is an example of using iterators for document traversal ([@samples/traverse_iter.cpp]): + +[import samples/traverse_iter.cpp] +[code_traverse_iter] + +[caution Node and attribute iterators are somewhere in the middle between const and non-const iterators. While dereference operation yields a non-constant reference to the object, so that you can use it for tree modification operations, modifying this reference by assignment - i.e. passing iterators to a function like `std::sort` - will not give expected results, as assignment modifies local handle that's stored in the iterator.] + +[endsect] [/iterators] + +[section:walker Recursive traversal with xml_tree_walker] + +[#xml_tree_walker] +The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you'll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement `xml_tree_walker` interface and to call `traverse` function: + + class xml_tree_walker + { + public: + virtual bool begin(xml_node& node); + virtual bool for_each(xml_node& node) = 0; + virtual bool end(xml_node& node); + + int depth() const; + }; + + bool xml_node::traverse(xml_tree_walker& walker); + +[#xml_tree_walker::begin][#xml_tree_walker::for_each][#xml_tree_walker::end][#xml_node::traverse] +The traversal is launched by calling `traverse` function on traversal root and proceeds as follows: + +* First, `begin` function is called with traversal root as its argument. +* Then, `for_each` function is called for all nodes in the traversal subtree in depth first order, excluding the traversal root. Node is passed as an argument. +* Finally, `end` function is called with traversal root as its argument. + +If `begin`, `end` or any of the `for_each` calls return `false`, the traversal is terminated and `false` is returned as the traversal result; otherwise, the traversal results in `true`. Note that you don't have to override `begin` or `end` functions; their default implementations return `true`. + +[#xml_tree_walker::depth] +You can get the node's depth relative to the traversal root at any point by calling `depth` function. It returns `-1` if called from `begin`\/`end`, and returns 0-based depth if called from `for_each` - depth is 0 for all children of the traversal root, 1 for all grandchildren and so on. + +This is an example of traversing tree hierarchy with xml_tree_walker ([@samples/traverse_walker.cpp]): + +[import samples/traverse_walker.cpp] +[code_traverse_walker_impl] +[code_traverse_walker_traverse] + +[endsect] [/walker] + +[section:predicate Searching for nodes/attributes with predicates] + +[#xml_node::find_attribute][#xml_node::find_child][#xml_node::find_node] +While there are existing functions for getting a node/attribute with known contents, they are often not sufficient for simple queries. As an alternative to iterating manually through nodes/attributes until the needed one is found, you can make a predicate and call one of `find_` functions: + + template xml_attribute xml_node::find_attribute(Predicate pred) const; + template xml_node xml_node::find_child(Predicate pred) const; + template xml_node xml_node::find_node(Predicate pred) const; + +The predicate should be either a plain function or a function object which accepts one argument of type `xml_attribute` (for `find_attribute`) or `xml_node` (for `find_child` and `find_node`), and returns `bool`. The predicate is never called with null handle as an argument. + +`find_attribute` function iterates through all attributes of the specified node, and returns the first attribute for which predicate returned `true`. If predicate returned `false` for all attributes or if there were no attributes (including the case where the node is null), null attribute is returned. + +`find_child` function iterates through all child nodes of the specified node, and returns the first node for which predicate returned `true`. If predicate returned `false` for all nodes or if there were no child nodes (including the case where the node is null), null node is returned. + +`find_node` function performs a depth-first traversal through the subtree of the specified node (excluding the node itself), and returns the first node for which predicate returned `true`. If predicate returned `false` for all nodes or if subtree was empty, null node is returned. + +This is an example of using predicate-based functions ([@samples/traverse_predicate.cpp]): + +[import samples/traverse_predicate.cpp] +[code_traverse_predicate_decl] +[code_traverse_predicate_find] + +[endsect] [/predicate] + +[section:misc Miscellaneous functions] + +[#xml_node::root] +If you need to get the document root of some node, you can use the following function: + + xml_node xml_node::root() const; + +This function returns the node with type `node_document`, which is the root node of the document the node belongs to (unless the node is null, in which case null node is returned). Currently this function has logarithmic complexity, since it simply finds such ancestor of the given node which itself has no parent. + +[#xml_node::path] +[#xml_node::first_element_by_path] +While pugixml supports complex XPath expressions, sometimes a simple path handling facility is needed. There are two functions, for getting node path and for converting path to a node: + + string_t xml_node::path(char_t delimiter = '/') const; + xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter = '/') const; + +Node paths consist of node names, separated with a delimiter (which is `/` by default); also paths can contain self (`.`) and parent (`..`) pseudo-names, so that this is a valid path: `"../../foo/./bar"`. `path` returns the path to the node from the document root, `first_element_by_path` looks for a node represented by a given path; a path can be an absolute one (absolute paths start with delimiter), in which case the rest of the path is treated as document root relative, and relative to the given node. For example, in the following document: ``, node `` has path `"a/b/c"`; calling `first_element_by_path` for document with path `"a/b"` results in node ``; calling `first_element_by_path` for node `` with path `"../a/./b/../."` results in node ``; calling `first_element_by_path` with path `"/a"` results in node `` for any node. + +In case path component is ambiguous (if there are two nodes with given name), the first one is selected; paths are not guaranteed to uniquely identify nodes in a document. If any component of a path is not found, the result of `first_element_by_path` is null node; also `first_element_by_path` returns null node for null nodes, in which case the path does not matter. `path` returns an empty string for null nodes. + +[note `path` function returns the result as STL string, and thus is not available if `PUGIXML_NO_STL` is defined.] + +[#xml_node::offset_debug] +pugixml does not record row/column information for nodes upon parsing for efficiency reasons. However, if the node has not changed in a significant way since parsing (the name/value are not changed, and the node itself is the original one, i.e. it was not deleted from the tree and re-added later), it is possible to get the offset from the beginning of XML buffer: + + ptrdiff_t xml_node::offset_debug() const; + +If the offset is not available (this happens if the node is null, was not originally parsed from a stream, or has changed in a significant way), the function returns -1. Otherwise it returns the offset to node's data from the beginning of XML buffer in `pugi::char_t` units. For more information on parsing offsets, see [link xml_parse_result::offset parsing error handling documentation]. + +[endsect] [/misc] + +[endsect] [/access] + +[section:modify Modifying document data] + +The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. This section provides documentation for the relevant functions. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead. + +All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: `void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }`, so const-correctness here mainly provides additional documentation. + +[import samples/modify_base.cpp] + +[section:nodedata Setting node data] + +[#xml_node::set_name][#xml_node::set_value] +As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. `node_document` nodes do not have name or value, `node_element` and `node_declaration` nodes always have a name but never have a value, `node_pcdata`, `node_cdata` and `node_comment` nodes never have a name but always have a value (it may be empty though), `node_pi` nodes always have a name and a value (again, value may be empty). In order to set node's name or value, you can use the following functions: + + bool xml_node::set_name(const char_t* rhs); + bool xml_node::set_value(const char_t* rhs); + +Both functions try to set the name\/value to the specified string, and return the operation result. The operation fails if the node can not have name or value (for instance, when trying to call `set_name` on a `node_pcdata` node), if the node handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. + +There is no equivalent of `child_value` function for modifying text children of the node. + +This is an example of setting node name and value ([@samples/modify_base.cpp]): + +[code_modify_base_node] + +[endsect] [/nodedata] + +[section:attrdata Setting attribute data] + +[#xml_attribute::set_name][#xml_attribute::set_value] +All attributes have name and value, both of which are strings (value may be empty). You can set them with the following functions: + + bool xml_attribute::set_name(const char_t* rhs); + bool xml_attribute::set_value(const char_t* rhs); + +Both functions try to set the name\/value to the specified string, and return the operation result. The operation fails if the attribute handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. + +In addition to string functions, several functions are provided for handling attributes with numbers and booleans as values: + + bool xml_attribute::set_value(int rhs); + bool xml_attribute::set_value(unsigned int rhs); + bool xml_attribute::set_value(double rhs); + bool xml_attribute::set_value(bool rhs); + +The above functions convert the argument to string and then call the base `set_value` function. Integers are converted to a decimal form, floating-point numbers are converted to either decimal or scientific form, depending on the number magnitude, boolean values are converted to either `"true"` or `"false"`. + +[caution Number conversion functions depend on current C locale as set with `setlocale`, so may generate unexpected results if the locale is different from `"C"`.] + +[note There are no portable 64-bit types in C++, so there is no corresponding `set_value` function. If your platform has a 64-bit integer, you can easily write such a function yourself.] + +[#xml_attribute::assign] + +For convenience, all `set_value` functions have the corresponding assignment operators: + + xml_attribute& xml_attribute::operator=(const char_t* rhs); + xml_attribute& xml_attribute::operator=(int rhs); + xml_attribute& xml_attribute::operator=(unsigned int rhs); + xml_attribute& xml_attribute::operator=(double rhs); + xml_attribute& xml_attribute::operator=(bool rhs); + +These operators simply call the right `set_value` function and return the attribute they're called on; the return value of `set_value` is ignored, so errors are not detected. + +This is an example of setting attribute name and value ([@samples/modify_base.cpp]): + +[code_modify_base_attr] + +[endsect] [/attrdata] + +[section:add Adding nodes/attributes] + +[#xml_node::append_attribute][#xml_node::insert_attribute_after][#xml_node::insert_attribute_before][#xml_node::append_child][#xml_node::insert_child_after][#xml_node::insert_child_before] +Nodes and attributes do not exist outside of document tree, so you can't create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before\/after some other node: + + xml_attribute xml_node::append_attribute(const char_t* name); + xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr); + xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr); + + xml_node xml_node::append_child(xml_node_type type = node_element); + xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node); + xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node); + +`append_attribute` and `append_child` create a new node/attribute at the end of the corresponding list of the node the method is called on; `insert_attribute_after`, `insert_attribute_before`, `insert_child_after` and `insert_attribute_before` add the node\/attribute before or after specified node\/attribute. + +Attribute functions create an attribute with the specified name; you can specify the empty name and change the name later if you want to. Node functions create the node with the specified type; since node type can't be changed, you have to know the desired type beforehand. Also note that not all types can be added as children; see below for clarification. + +All functions return the handle to newly created object on success, and null handle on failure. There are several reasons for failure: + +* Adding fails if the target node is null; +* Only `node_element` nodes can contain attributes, so attribute adding fails if node is not an element; +* Only `node_document` and `node_element` nodes can contain children, so child node adding fails if target node is not an element or a document; +* `node_document` and `node_null` nodes can not be inserted as children, so passing `node_document` or `node_null` value as type results in operation failure; +* `node_declaration` nodes can only be added as children of the document node; attempt to insert declaration node as a child of an element node fails; +* Adding node/attribute results in memory allocation, which may fail; +* Insertion functions fail if the specified node or attribute is not in the target node's children/attribute list. + +Even if the operation fails, the document remains in consistent state, but the requested node/attribute is not added. + +[caution attribute() and child() functions do not add attributes or nodes to the tree, so code like `node.attribute("id") = 123;` will not do anything if `node` does not have an attribute with name `"id"`. Make sure you're operating with existing attributes/nodes by adding them if necessary.] + +This is an example of adding new attributes\/nodes to the document ([@samples/modify_add.cpp]): + +[import samples/modify_add.cpp] +[code_modify_add] + +[endsect] [/add] + +[section:remove Removing nodes/attributes] + +[#xml_node::remove_attribute][#xml_node::remove_child] +If you do not want your document to contain some node or attribute, you can remove it with one of the following functions: + + bool xml_node::remove_attribute(const xml_attribute& a); + bool xml_node::remove_child(const xml_node& n); + +`remove_attribute` removes the attribute from the attribute list of the node, and returns the operation result. `remove_child` removes the child node with the entire subtree (including all descendant nodes and attributes) from the document, and returns the operation result. Removing fails if one of the following is true: + +* The node the function is called on is null; +* The attribute\/node to be removed is null; +* The attribute\/node to be removed is not in the node's attribute\/child list. + +Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute\/node is removed. + +If you want to remove the attribute or child node by its name, two additional helper functions are available: + + bool xml_node::remove_attribute(const char_t* name); + bool xml_node::remove_child(const char_t* name); + +These functions look for the first attribute or child with the specified name, and then remove it, returning the result. If there is no attribute or child with such name, the function returns `false`; if there are two nodes with the given name, only the first node is deleted. If you want to delete all nodes with the specified name, you can use code like this: `while (node.remove_child("tool")) ;`. + +This is an example of removing attributes\/nodes from the document ([@samples/modify_remove.cpp]): + +[import samples/modify_remove.cpp] +[code_modify_remove] + +[endsect] [/remove] + +[section:clone Cloning nodes/attributes] + +[#xml_node::append_copy][#xml_node::insert_copy_after][#xml_node::insert_copy_before] +With the help of previously described functions, it is possible to create trees with any contents and structure, including cloning the existing data. However since this is an often needed operation, pugixml provides built-in node/attribute cloning facilities. Since nodes and attributes do not exist outside of document tree, you can't create a standalone copy - you have to immediately insert it somewhere in the tree. For this, you can use one of the following functions: + + xml_attribute xml_node::append_copy(const xml_attribute& proto); + xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); + xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); + xml_node xml_node::append_copy(const xml_node& proto); + xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node); + xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node); + +These functions mirror the structure of `append_child`, `insert_child_before` and related functions - they take the handle to the prototype object, which is to be cloned, insert a new attribute\/node at the appropriate place, and then copy the attribute data or the whole node subtree to the new object. The functions return the handle to the resulting duplicate object, or null handle on failure. + +The attribute is copied along with the name and value; the node is copied along with its type, name and value; additionally attribute list and all children are recursively cloned, resulting in the deep subtree clone. The prototype object can be a part of the same document, or a part of any other document. + +The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, [link xml_node::append_child consult their documentation for more information]. There are additional caveats specific to cloning functions: + +* Cloning null handles results in operation failure; +* Node cloning starts with insertion of the node of the same type as that of the prototype; for this reason, cloning functions can not be directly used to clone entire documents, since `node_document` is not a valid insertion type. The example below provides a workaround. +* It is possible to copy a subtree as a child of some node inside this subtree, i.e. `node.append_copy(node.parent().parent());`. This is a valid operation, and it results in a clone of the subtree in the state before cloning started, i.e. no infinite recursion takes place. + +This is an example with one possible implementation of include tags in XML ([@samples/include.cpp]). It illustrates node cloning and usage of other document modification functions: + +[import samples/include.cpp] +[code_include] + +[endsect] [/clone] + +[endsect] [/modify] + +[section:saving Saving document] + +Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format (see [sref manual.saving.options]), and also perform necessary encoding conversions (see [sref manual.saving.encoding]). This section documents the relevant functionality. + +The node/attribute data is written to the destination properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped. In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For proper output, make sure all node and attribute names are set to meaningful values. + +[caution Currently the content of CDATA sections is not escaped, so CDATA sections with values that contain `"]]>"` will result in malformed document. This will be fixed in version 1.0.] + +[section:file Saving document to a file] + +[#xml_document::save_file] +If you want to save the whole document to a file, you can use the following function: + + bool xml_document::save_file(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + +This function accepts file path as its first argument, and also three optional arguments, which specify indentation and other output options (see [sref manual.saving.options]) and output data encoding (see [sref manual.saving.encoding]). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of target system, it should have the exact case if target file system is case-sensitive, etc. File path is passed to system file opening function as is. + +[#xml_writer_file] +`save_file` opens the target file for writing, outputs the requested header (by default a document declaration is output, unless the document already has one), and then saves the document contents. If the file could not be opened, the function returns `false`. Calling `save_file` is equivalent to creating an `xml_writer_file` object with `FILE*` handle as the only constructor argument and then calling `save`; see [sref manual.saving.writer] for writer interface details. + +[note As of version 0.9, there is no function for saving XML document to wide character paths. Unfortunately, there is no portable way to do this; the version 1.0 will provide such function only for platforms with the corresponding functionality. You can use stream-saving functions as a workaround if your STL implementation can open file streams via wchar_t paths.] + +This is a simple example of saving XML document to file ([@samples/save_file.cpp]): + +[import samples/save_file.cpp] +[code_save_file] + +[endsect] [/file] + +[section:stream Saving document to C++ IOstreams] + +[#xml_document::save_stream] +For additional interoperability pugixml provides functions for saving document to any object which implements C++ std::ostream interface. This allows you to save documents to any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use `std::cout` stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones: + + void xml_document::save(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + void xml_document::save(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const; + +`save` with `std::ostream` argument saves the document to the stream in the same way as `save_file` (i.e. with requested header and with encoding conversions). On the other hand, `save` with `std::wstream` argument saves the document to the wide stream with `encoding_wchar` encoding. Because of this, using `save` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you with the ability to save documents to non-Unicode encodings, i.e. you can save Shift-JIS encoded data if you set the correct locale. + +[#xml_writer_stream] +Calling `save` with stream target is equivalent to creating an `xml_writer_stream` object with stream as the only constructor argument and then calling `save`; see [sref manual.saving.writer] for writer interface details. + +This is a simple example of saving XML document to standard output ([@samples/save_stream.cpp]): + +[import samples/save_stream.cpp] +[code_save_stream] + +[endsect] [/stream] + +[section:writer Saving document via writer interface] + +[#xml_document::save][#xml_writer][#xml_writer::write] +All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input: + + class xml_writer + { + public: + virtual void write(const void* data, size_t size) = 0; + }; + + void xml_document::save(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + +In order to output the document via some custom transport, for example sockets, you should create an object which implements `xml_writer_file` interface and pass it to `save` function. `xml_writer_file::write` function is called with a buffer as an input, where `data` points to buffer start, and `size` is equal to the buffer size in bytes. `write` implementation must write the buffer to the transport; it can not save the passed buffer pointer, as the buffer contents will change after `write` returns. The buffer contains the chunk of document data in the desired encoding. + +`write` function is called with relatively large blocks (size is usually several kilobytes, except for the first block with BOM, which is output only if `format_write_bom` is set, and last block, which may be small), so there is often no need for additional buffering in the implementation. + +This is a simple example of custom writer for saving document data to STL string ([@samples/save_custom_writer.cpp]); read the sample code for more complex examples: + +[import samples/save_custom_writer.cpp] +[code_save_custom_writer] + +[endsect] [/writer] + +[section:subtree Saving a single subtree] + +[#xml_node::print][#xml_node::print_stream] +While the previously described functions saved the whole document to the destination, it is easy to save a single subtree. The following functions are provided: + + void xml_node::print(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + void xml_node::print(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const; + void xml_node::print(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + +These functions have the same arguments with the same meaning as the corresponding `xml_document::save` functions, and allow you to save the subtree to either a C++ IOstream or to any object that implements `xml_writer` interface. + +Saving a subtree differs from saving the whole document: the process behaves as if `format_write_bom` is off, and `format_no_declaration` is on, even if actual values of the flags are different. This means that BOM is not written to the destination, and document declaration is only written if it is the node itself or is one of node's children. Note that this also holds if you're saving a document; this example ([@samples/save_subtree.cpp]) illustrates the difference: + +[import samples/save_subtree.cpp] +[code_save_subtree] + +[endsect] [/subtree] + +[section:options Output options] + +All saving functions accept the optional parameter `flags`. This is a bitmask that customizes the output format; you can select the way the document nodes are printed and select the needed additional information that is output before the document contents. + +[note You should use the usual bitwise arithmetics to manipulate the bitmask: to enable a flag, use `mask | flag`; to disable a flag, use `mask & ~flag`.] + +These flags control the resulting tree contents: + +* [anchor format_indent] determines if all nodes should be indented with the indentation string (this is an additional parameter for all saving functions, and is `"\t"` by default). If this flag is on, before every node the indentation string is output several times, where the amount of indentation depends on the node's depth relative to the output subtree. This flag has no effect if `format_raw` is enabled. This flag is *on* by default. +[lbr] + +* [anchor format_raw] switches between formatted and raw output. If this flag is on, the nodes are not indented in any way, and also no newlines that are not part of document text are printed. Raw mode can be used for serialization where the result is not intended to be read by humans; also it can be useful if the document was parsed with `parse_ws_pcdata` flag, to preserve the original document formatting as much as possible. This flag is *off* by default. + +These flags control the additional output information: + +* [anchor format_no_declaration] allows to disable default node declaration output. By default, if the document is saved via `save` or `save_file` function, and it does not have any document declaration, a default declaration is output before the document contents. Enabling this flag disables this declaration. This flag has no effect in `xml_node::print` functions: they never output the default declaration. This flag is *off* by default. +[lbr] + +* [anchor format_write_bom] allows to enable Byte Order Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 encodings the resulting document's encoding may not be recognized by some parsers and text editors, if they do not implement sophisticated encoding detection. Enabling this flag adds an encoding-specific BOM to the output. This flag has no effect in `xml_node::print` functions: they never output the BOM. This flag is *off* by default. + +Additionally, there is one predefined option mask: + +* [anchor format_default] is the default set of flags, i.e. it has all options set to their default values. It sets formatted output with indentation, without BOM and with default node declaration, if necessary. + +This is an example that shows the outputs of different output options ([@samples/save_options.cpp]): + +[import samples/save_options.cpp] +[code_save_options] + +[endsect] [/options] + +[section:encoding Encodings] + +pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions during output. The output encoding is set via the `encoding` parameter of saving functions, which is of type `xml_encoding`. The possible values for the encoding are documented in [sref manual.loading.encoding]; the only flag that has a different meaning is `encoding_auto`. + +While all other flags set the exact encoding, `encoding_auto` is meant for automatic encoding detection. The automatic detection does not make sense for output encoding, since there is usually nothing to infer the actual encoding from, so here `encoding_auto` means UTF-8 encoding, which is the most popular encoding for XML data storage. This is also the default value of output encoding; specify another value if you do not want UTF-8 encoded output. + +Also note that wide stream saving functions do not have `encoding` argument and always assume `encoding_wchar` encoding. + +[note The current behavior for Unicode conversion is to skip all invalid UTF sequences during conversion. This behavior should not be relied upon; if your node/attribute names do not contain any valid UTF sequences, they may be output as if they are empty, which will result in malformed XML document.] + +[endsect] [/encoding] + +[endsect] [/saving] + +[section:xpath XPath] + +If the task at hand is to select a subset of document nodes that match some criteria, it is possible to code a function using the existing traversal functionality for any practical criteria. However, often either a data-driven approach is desirable, in case the criteria are not predefined and come from a file, or it is inconvenient to use traversal interfaces and a higher-level DSL is required. There is a standard language for XML processing, XPath, that can be useful for these cases. pugixml implements an almost complete subset of XPath 1.0. Because of differences in document object model and some performance implications, there are minor violations of the official specifications, which can be found in [sref manual.xpath.w3c]. The rest of this section describes the interface for XPath functionality. Please note that if you wish to learn to use XPath language, you have to look for other tutorials or manuals; for example, you can read [@http://www.w3schools.com/xpath/ W3Schools XPath tutorial], [@http://www.tizag.com/xmlTutorial/xpathtutorial.php XPath tutorial at tizag.com], and [@http://www.w3.org/TR/xpath/ the XPath 1.0 specification]. + +[note As of version 0.9, you need both STL and exception support to use XPath; XPath is disabled if either `PUGIXML_NO_STL` or `PUGIXML_NO_EXCEPTIONS` is defined.] + +[section:types XPath types] + +[#xpath_value_type][#xpath_type_number][#xpath_type_string][#xpath_type_boolean][#xpath_type_node_set][#xpath_type_none] +Each XPath expression can have one of the following types: boolean, number, string or node set. Boolean type corresponds to `bool` type, number type corresponds to `double` type, string type corresponds to either `std::string` or `std::wstring`, depending on whether [link manual.dom.unicode wide character interface is enabled], and node set corresponds to `xpath_node_set` type. There is an enumeration, `xpath_value_type`, which can take the values `xpath_type_boolean`, `xpath_type_number`, `xpath_type_string` or `xpath_type_node_set`, accordingly. + +[#xpath_node][#xpath_node::node][#xpath_node::attribute][#xpath_node::parent] +Because an XPath node can be either a node or an attribute, there is a special type, `xpath_node`, which is a discriminated union of these types. A value of this type contains two node handles, one of `xml_node` type, and another one of `xml_attribute` type; at most one of them can be non-null. The accessors to get these handles are available: + + xml_node xpath_node::node() const; + xml_attribute xpath_node::attribute() const; + +XPath nodes can be null, in which case both accessors return null handles. + +Note that as per XPath specification, each XPath node has a parent, which can be retrieved via this function: + + xml_node xpath_node::parent() const; + +`parent` function returns the node's parent if the XPath node corresponds to `xml_node` handle (equivalent to `node().parent()`), or the node to which the attribute belongs to, if the XPath node corresponds to `xml_attribute` handle. For null nodes, `parent` returns null handle. + +[#xpath_node::unspecified_bool_type][#xpath_node::comparison] +Like node and attribute handles, XPath node handles can be implicitly cast to boolean-like object to check if it is a null node, and also can be compared for equality with each other. + +[#xpath_node::ctor] +You can also create XPath nodes with one of tree constructors: the default constructor, the constructor that takes node argument, and the constructor that takes attribute and node arguments (in which case the attribute must belong to the attribute list of the node). However, usually you don't need to create your own XPath node objects, since they are returned to you via selection functions. + +[#xpath_node_set] +XPath expressions operate not on single nodes, but instead on node sets. A node set is a collection of nodes, which can be optionally ordered in either a forward document order or a reverse one. Document order is defined in XPath specification; an XPath node is before another node in document order if it appears before it in XML representation of the corresponding document. + +[#xpath_node_set::const_iterator][#xpath_node_set::begin][#xpath_node_set::end] +Node sets are represented by `xpath_node_set` object, which has an interface that resembles one of sequential random-access containers. It has an iterator type along with usual begin/past-the-end iterator accessors: + + typedef const xpath_node* xpath_node_set::const_iterator; + const_iterator xpath_node_set::begin() const; + const_iterator xpath_node_set::end() const; + +[#xpath_node_set::index][#xpath_node_set::size][#xpath_node_set::empty] +And it also can be iterated via indices, just like `std::vector`: + + const xpath_node& xpath_node_set::operator[](size_t index) const; + size_t xpath_node_set::size() const; + bool xpath_node_set::empty() const; + +All of the above operations have the same semantics as that of `std::vector`: the iterators are random-access, all of the above operations are constant time, and accessing the element at index that is greater or equal than the set size results in undefined behavior. You can use both iterator-based and index-based access for iteration, however the iterator-based can be faster. + +[#xpath_node_set::type][#xpath_node_set::type_unsorted][#xpath_node_set::type_sorted][#xpath_node_set::type_sorted_reverse][#xpath_node_set::sort] +The order of iteration depends on the order of nodes inside the set; the order can be queried via the following function: + + enum xpath_node_set::type_t {type_unsorted, type_sorted, type_sorted_reverse}; + type_t xpath_node_set::type() const; + +`type` function returns the current order of nodes; `type_sorted` means that the nodes are in forward document order, `type_sorted_reverse` means that the nodes are in reverse document order, and `type_unsorted` means that neither order is guaranteed (nodes can accidentally be in a sorted order even if `type()` returns `type_unsorted`). If you require a specific order of iteration, you can change it via `sort` function: + + void xpath_node_set::sort(bool reverse = false); + +Calling `sort` sorts the nodes in either forward or reverse document order, depending on the argument; after this call `type()` will return `type_sorted` or `type_sorted_reverse`. + +[#xpath_node_set::first] +Often the actual iteration is not needed; instead, only the first element in document order is required. For this, a special accessor is provided: + + xpath_node xpath_node_set::first() const; + +This function returns the first node in forward document order from the set, or null node if the set is empty. Note that while the result of the node does not depend on the order of nodes in the set (i.e. on the result of `type()`), the complexity does - if the set is sorted, the complexity is constant, otherwise it is linear in the number of elements or worse. + +[endsect] [/types] + +[section:select Selecting nodes via XPath expression] + +[#xml_node::select_single_node][#xml_node::select_nodes] +If you want to select nodes that match some XPath expression, you can do it with the following functions: + + xpath_node xml_node::select_single_node(const char_t* query) const; + xpath_node_set xml_node::select_nodes(const char_t* query) const; + +`select_nodes` function compiles the expression and then executes it with the node as a context node, and returns the resulting node set. `select_single_node` returns only the first node in document order from the result, and is equivalent to calling `select_nodes(query).first()`. If the XPath expression does not match anything, or the node handle is null, `select_nodes` returns an empty set, and `select_single_node` returns null XPath node. + +Both functions throw `xpath_exception` if the query can not be compiled or if it returns a value with type other than node set; see [sref manual.xpath.errors] for details. + +[#xml_node::select_single_node_precomp][#xml_node::select_nodes_precomp] +While compiling expressions is fast, the compilation time can introduce a significant overhead if the same expression is used many times on small subtrees. If you're doing many similar queries, consider compiling them into query objects (see [sref manual.xpath.query] for further reference). Once you get a compiled query object, you can pass it to select functions instead of an expression string: + + xpath_node xml_node::select_single_node(const xpath_query& query) const; + xpath_node_set xml_node::select_nodes(const xpath_query& query) const; + +Both functions throw `xpath_exception` if the query returns a value with type other than node set. + +This is an example of selecting nodes using XPath expressions ([@samples/xpath_select.cpp]): + +[import samples/xpath_select.cpp] +[code_xpath_select] + +[endsect] [/select] + +[section:query Using query objects] + +[#xpath_query] +When you call `select_nodes` with an expression string as an argument, a query object is created behind the scene. A query object represents a compiled XPath expression. Query objects can be needed in the following circumstances: + +* You can precompile expressions to query objects to save compilation time if it becomes an issue; +* You can use query objects to evaluate XPath expressions which result in booleans, numbers or strings; +* You can get the type of expression value via query object. + +Query objects correspond to `xpath_query` type. They are immutable and non-copyable: they are bound to the expression at creation time and can not be cloned. If you want to put query objects in a container, allocate them on heap via `new` operator and store pointers to `xpath_query` in the container. + +[#xpath_query::ctor] +You can create a query object with the constructor that takes XPath expression as an argument: + + explicit xpath_query::xpath_query(const char_t* query); + +[#xpath_query::return_type] +The expression is compiled and the compiled representation is stored in the new query object. If compilation fails, `xpath_exception` is thrown (see [sref manual.xpath.errors] for details). After the query is created, you can query the type of the evaluation result using the following function: + + xpath_value_type xpath_query::return_type() const; + +[#xpath_query::evaluate_boolean][#xpath_query::evaluate_number][#xpath_query::evaluate_string][#xpath_query::evaluate_node_set] +You can evaluate the query using one of the following functions: + + bool xpath_query::evaluate_boolean(const xml_node& n) const; + double xpath_query::evaluate_number(const xml_node& n) const; + string_t xpath_query::evaluate_string(const xml_node& n) const; + xpath_node_set xpath_query::evaluate_node_set(const xml_node& n) const; + +All functions take the context node as an argument, compute the expression and return the result, converted to the requested type. By XPath specification, value of any type can be converted to boolean, number or string value, but no type other than node set can be converted to node set. Because of this, `evaluate_boolean`, `evaluate_number` and `evaluate_string` always return a result, but `evaluate_node_set` throws an `xpath_exception` if the return type is not node set. + +[note Calling `node.select_nodes("query")` is equivalent to calling `xpath_query("query").evaluate_node_set(node)`.] + +This is an example of using query objects ([@samples/xpath_query.cpp]): + +[import samples/xpath_query.cpp] +[code_xpath_query] + +[endsect] [/query] + +[section:errors Error handling] + +[#xpath_exception][#xpath_exception::what] +As of version 0.9, all XPath errors result in thrown exceptions. The errors can arise during expression compilation or node set evaluation. In both cases, an `xpath_exception` object is thrown. This is an exception object that implements `std::exception` interface, and thus has a single function `what()`: + + virtual const char* xpath_exception::what() const throw(); + +This function returns the error message. Currently it is impossible to get the exact place where query compilation failed. This functionality, along with optional error handling without exceptions, will be available in version 1.0. + +This is an example of XPath error handling ([@samples/xpath_error.cpp]): + +[import samples/xpath_error.cpp] +[code_xpath_error] + +[endsect] [/errors] + +[section:w3c Conformance to W3C specification] + +Because of the differences in document object models, performance considerations and implementation complexity, pugixml does not provide a fully conformant XPath 1.0 implementation. This is the current list of incompatibilities: + +* Consecutive text nodes sharing the same parent are not merged, i.e. in `text1 text2` node should have one text node children, but instead has three. +* Since document can't have a document type declaration, `id()` function always returns an empty node set. +* Namespace nodes are not supported (affects namespace:: axis). +* Name tests are performed on QNames in XML document instead of expanded names; for ``, query `foo/ns1:*` will return only the first child, not both of them. Compliant XPath implementations can return both nodes if the user provides appropriate namespace declarations. +* String functions consider a character to be either a single `char` value or a single `wchar_t` value, depending on the library configuration; this means that some string functions are not fully Unicode-aware. This affects `substring()`, `string-length()` and `translate()` functions. +* Variable references are not supported. + +Some of these incompatibilities will be fixed in version 1.0. + +[endsect] [/w3c] + +[endsect] [/xpath] + +[section:changes Changelog] + +[h5 1.07.2010 - version 0.9] + +Major release, featuring extended and improved Unicode support, miscellaneous performance improvements, bug fixes and more. + +* Major Unicode improvements: + # Introduced encoding support (automatic/manual encoding detection on load, manual encoding selection on save, conversion from/to UTF8, UTF16 LE/BE, UTF32 LE/BE) + # Introduced wchar_t mode (you can set PUGIXML_WCHAR_MODE define to switch pugixml internal encoding from UTF8 to wchar_t; all functions are switched to their Unicode variants) + # Load/save functions now support wide streams + +* Bug fixes: + # Fixed document corruption on failed parsing bug + # XPath string <-> number conversion improvements (increased precision, fixed crash for huge numbers) + # Improved DOCTYPE parsing: now parser recognizes all well-formed DOCTYPE declarations + # Fixed xml_attribute::as_uint() for large numbers (i.e. 2^32-1) + # Fixed xml_node::first_element_by_path for path components that are prefixes of node names, but are not exactly equal to them. + +* Specification changes: + # parse() API changed to load_buffer/load_buffer_inplace/load_buffer_inplace_own; load_buffer APIs do not require zero-terminated strings. + # Renamed as_utf16 to as_wide + # Changed xml_node::offset_debug return type and xml_parse_result::offset type to ptrdiff_t + # Nodes/attributes with empty names are now printed as :anonymous + +* Performance improvements: + # Optimized document parsing and saving + # Changed internal memory management: internal allocator is used for both metadata and name/value data; allocated pages are deleted if all allocations from them are deleted + # Optimized memory consumption: sizeof(xml_node_struct) reduced from 40 bytes to 32 bytes on x86 + # Optimized debug mode parsing/saving by order of magnitude + +* Miscellaneous: + # All STL includes except in pugixml.hpp are replaced with forward declarations + # xml_node::remove_child and xml_node::remove_attribute now return the operation result + +* Compatibility: + # parse() and as_utf16 are left for compatibility (these functions are deprecated and will be removed in version 1.0) + # Wildcard functions, document_order/precompute_document_order functions, all_elements_by_name function and format_write_bom_utf8 flag are deprecated and will be removed in version 1.0 + # xpath_type_t enumeration was renamed to xpath_value_type; xpath_type_t is deprecated and will be removed in version 1.0 + +[h5 8.11.2009 - version 0.5] + +Major bugfix release. Changes: + +* XPath bugfixes: + # Fixed translate(), lang() and concat() functions (infinite loops/crashes) + # Fixed compilation of queries with empty literal strings ("") + # Fixed axis tests: they never add empty nodes/attributes to the resulting node set now + # Fixed string-value evaluation for node-set (the result excluded some text descendants) + # Fixed self:: axis (it behaved like ancestor-or-self::) + # Fixed following:: and preceding:: axes (they included descendent and ancestor nodes, respectively) + # Minor fix for namespace-uri() function (namespace declaration scope includes the parent element of namespace declaration attribute) + # Some incorrect queries are no longer parsed now (i.e. foo: *) + # Fixed text()/etc. node test parsing bug (i.e. foo[text()] failed to compile) + # Fixed root step (/) - it now selects empty node set if query is evaluated on empty node + # Fixed string to number conversion ("123 " converted to NaN, "123 .456" converted to 123.456 - now the results are 123 and NaN, respectively) + # Node set copying now preserves sorted type; leads to better performance on some queries + +* Miscellaneous bugfixes: + # Fixed xml_node::offset_debug for PI nodes + # Added empty attribute checks to xml_node::remove_attribute + # Fixed node_pi and node_declaration copying + # Const-correctness fixes + +* Specification changes: + # xpath_node::select_nodes() and related functions now throw exception if expression return type is not node set (instead of assertion) + # xml_node::traverse() now sets depth to -1 for both begin() and end() callbacks (was 0 at begin() and -1 at end()) + # In case of non-raw node printing a newline is output after PCDATA inside nodes if the PCDATA has siblings + # UTF8 -> wchar_t conversion now considers 5-byte UTF8-like sequences as invalid + +* New features: + # Added xpath_node_set::operator[] for index-based iteration + # Added xpath_query::return_type() + # Added getter accessors for memory-management functions + +[h5 17.09.2009 - version 0.42] + +Maintenance release. Changes: + +* Bug fixes: + # Fixed deallocation in case of custom allocation functions or if delete[] / free are incompatible + # XPath parser fixed for incorrect queries (i.e. incorrect XPath queries should now always fail to compile) + # Const-correctness fixes for find_child_by_attribute + # Improved compatibility (miscellaneous warning fixes, fixed cstring include dependency for GCC) + # Fixed iterator begin/end and print function to work correctly for empty nodes + +* New features: + # Added PUGIXML_API/PUGIXML_CLASS/PUGIXML_FUNCTION configuration macros to control class/function attributes + # Added xml_attribute::set_value overloads for different types + +[h5 8.02.2009 - version 0.41] + +Maintenance release. Changes: + +* Bug fixes: + # Fixed bug with node printing (occasionally some content was not written to output stream) + +[h5 18.01.2009 - version 0.4] + +Changes: + +* Bug fixes: + # Documentation fix in samples for parse() with manual lifetime control + # Fixed document order sorting in XPath (it caused wrong order of nodes after xpath_node_set::sort and wrong results of some XPath queries) + +* Node printing changes: + # Single quotes are no longer escaped when printing nodes + # Symbols in second half of ASCII table are no longer escaped when printing nodes; because of this, format_utf8 flag is deleted as it's no longer needed and format_write_bom is renamed to format_write_bom_utf8. + # Reworked node printing - now it works via xml_writer interface; implementations for FILE* and std::ostream are available. As a side-effect, xml_document::save_file now works without STL. + +* New features: + # Added unsigned integer support for attributes (xml_attribute::as_uint, xml_attribute::operator=) + # Now document declaration () is parsed as node with type node_declaration when parse_declaration flag is specified (access to encoding/version is performed as if they were attributes, i.e. doc.child("xml").attribute("version").as_float()); corresponding flags for node printing were also added + # Added support for custom memory management (see set_memory_management_functions for details) + # Implemented node/attribute copying (see xml_node::insert\_copy_* and xml_node::append_copy for details) + # Added find_child_by_attribute and find_child_by_attribute_w to simplify parsing code in some cases (i.e. COLLADA files) + # Added file offset information querying for debugging purposes (now you're able to determine exact location of any xml_node in parsed file, see xml_node::offset_debug for details) + # Improved error handling for parsing - now load(), load_file() and parse() return xml_parse_result, which contains error code and last parsed offset; this does not break old interface as xml_parse_result can be implicitly casted to bool. + +[h5 31.10.2007 - version 0.34] + +Maintenance release. Changes: + +* Bug fixes: + # Fixed bug with loading from text-mode iostreams + # Fixed leak when transfer_ownership is true and parsing is failing + # Fixed bug in saving (\\r and \\n are now escaped in attribute values) + # Renamed free() to destroy() - some macro conflicts were reported + +* New features: + # Improved compatibility (supported Digital Mars C++, MSVC 6, CodeWarrior 8, PGI C++, Comeau, supported PS3 and XBox360) + # PUGIXML_NO_EXCEPTION flag for platforms without exception handling + +[h5 21.02.2007 - version 0.3] + +Refactored, reworked and improved version. Changes: + +* Interface: + # Added XPath + # Added tree modification functions + # Added no STL compilation mode + # Added saving document to file + # Refactored parsing flags + # Removed xml_parser class in favor of xml_document + # Added transfer ownership parsing mode + # Modified the way xml_tree_walker works + # Iterators are now non-constant + +* Implementation: + # Support of several compilers and platforms + # Refactored and sped up parsing core + # Improved standard compliancy + # Added XPath implementation + # Fixed several bugs + +[h5 6.11.2006 - version 0.2] + +First public release. Changes: + +* Bug fixes: + # Fixed child_value() (for empty nodes) + # Fixed xml_parser_impl warning at W4 + +* New features: + # Introduced child_value(name) and child_value_w(name) + # parse_eol_pcdata and parse_eol_attribute flags + parse_minimal optimizations + # Optimizations of strconv_t + +[h5 15.07.2006 - version 0.1] + +First private release for testing purposes + +[endsect] [/changes] + +[section:apiref API Reference] + +This is the reference for all macros, types, enumerations, classes and functions in pugixml. Each symbol is a link that leads to the relevant section of the manual. + +Macros: + +* `#define `[link PUGIXML_WCHAR_MODE] +* `#define `[link PUGIXML_NO_XPATH] +* `#define `[link PUGIXML_NO_STL] +* `#define `[link PUGIXML_NO_EXCEPTIONS] +* `#define `[link PUGIXML_API] +* `#define `[link PUGIXML_CLASS] +* `#define `[link PUGIXML_FUNCTION] + +Types: + +* `typedef `/configuration-defined type/` `[link char_t]`;` +* `typedef `/configuration-defined type/` `[link string_t]`;` +* `typedef void* (*`[link allocation_function]`)(size_t size);` +* `typedef void (*`[link deallocation_function]`)(void* ptr);` + +Enumerations: + +* `enum `[link xml_node_type] + * [link node_null] + * [link node_document] + * [link node_element] + * [link node_pcdata] + * [link node_cdata] + * [link node_comment] + * [link node_pi] + * [link node_declaration] + [lbr] + +* `enum `[link xml_parse_status] + * [link status_ok] + * [link status_file_not_found] + * [link status_io_error] + * [link status_out_of_memory] + * [link status_internal_error] + * [link status_unrecognized_tag] + * [link status_bad_pi] + * [link status_bad_comment] + * [link status_bad_cdata] + * [link status_bad_doctype] + * [link status_bad_pcdata] + * [link status_bad_start_element] + * [link status_bad_attribute] + * [link status_bad_end_element] + * [link status_end_element_mismatch] + [lbr] + +* `enum `[link xml_encoding] + * [link encoding_auto] + * [link encoding_utf8] + * [link encoding_utf16_le] + * [link encoding_utf16_be] + * [link encoding_utf16] + * [link encoding_utf32_le] + * [link encoding_utf32_be] + * [link encoding_utf32] + * [link encoding_wchar] + [lbr] + +* `enum `[link xpath_value_type] + * [link xpath_type_none] + * [link xpath_type_node_set] + * [link xpath_type_number] + * [link xpath_type_string] + * [link xpath_type_boolean] + +Constants: + +* Formatting options bit flags: + * [link format_default] + * [link format_indent] + * [link format_no_declaration] + * [link format_raw] + * [link format_write_bom] + [lbr] + +* Parsing options bit flags: + * [link parse_cdata] + * [link parse_comments] + * [link parse_declaration] + * [link parse_default] + * [link parse_eol] + * [link parse_escapes] + * [link parse_minimal] + * [link parse_pi] + * [link parse_ws_pcdata] + * [link parse_wconv_attribute] + * [link parse_wnorm_attribute] + +Classes: + +* `class `[link xml_attribute] + * [link xml_attribute::ctor xml_attribute]`();` + [lbr] + + * `bool `[link xml_attribute::empty empty]`() const;` + * `operator `[link xml_attribute::unspecified_bool_type unspecified_bool_type]`() const;` + [lbr] + + * `bool `[link xml_attribute::comparison operator==]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator!=]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator<]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator>]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator<=]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator>=]`(const xml_attribute& r) const;` + [lbr] + + * `xml_attribute `[link xml_attribute::next_attribute next_attribute]`() const;` + * `xml_attribute `[link xml_attribute::previous_attribute previous_attribute]`() const;` + [lbr] + + * `const char_t* `[link xml_attribute::name name]`() const;` + * `const char_t* `[link xml_attribute::value value]`() const;` + [lbr] + + * `int `[link xml_attribute::as_int as_int]`() const;` + * `unsigned int `[link xml_attribute::as_uint as_uint]`() const;` + * `double `[link xml_attribute::as_double as_double]`() const;` + * `float `[link xml_attribute::as_float as_float]`() const;` + * `bool `[link xml_attribute::as_bool as_bool]`() const;` + [lbr] + + * `bool `[link xml_attribute::set_name set_name]`(const char_t* rhs);` + * `bool `[link xml_attribute::set_value set_value]`(const char_t* rhs);` + * `bool `[link xml_attribute::set_value set_value]`(int rhs);` + * `bool `[link xml_attribute::set_value set_value]`(unsigned int rhs);` + * `bool `[link xml_attribute::set_value set_value]`(double rhs);` + * `bool `[link xml_attribute::set_value set_value]`(bool rhs);` + [lbr] + + * `xml_attribute& `[link xml_attribute::assign operator=]`(const char_t* rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(int rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(unsigned int rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(double rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(bool rhs);` + [lbr] + +* `class `[link xml_node] + * [link xml_node::ctor xml_node]`();` + [lbr] + + * `bool `[link xml_node::empty empty]`() const;` + * `operator `[link xml_node::unspecified_bool_type unspecified_bool_type]`() const;` + [lbr] + + * `bool `[link xml_node::comparison operator==]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator!=]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator<]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator>]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator<=]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator>=]`(const xml_node& r) const;` + [lbr] + + * `xml_node_type `[link xml_node::type type]`() const;` + [lbr] + + * `const char_t* `[link xml_node::name name]`() const;` + * `const char_t* `[link xml_node::value value]`() const;` + [lbr] + + * `xml_node `[link xml_node::parent parent]`() const;` + * `xml_node `[link xml_node::first_child first_child]`() const;` + * `xml_node `[link xml_node::last_child last_child]`() const;` + * `xml_node `[link xml_node::next_sibling next_sibling]`() const;` + * `xml_node `[link xml_node::previous_sibling previous_sibling]`() const;` + [lbr] + + * `xml_attribute `[link xml_node::first_attribute first_attribute]`() const;` + * `xml_attribute `[link xml_node::last_attribute last_attribute]`() const;` + [lbr] + + * `xml_node `[link xml_node::child child]`(const char_t* name) const;` + * `xml_attribute `[link xml_node::attribute attribute]`(const char_t* name) const;` + * `xml_node `[link xml_node::next_sibling_name next_sibling]`(const char_t* name) const;` + * `xml_node `[link xml_node::previous_sibling_name previous_sibling]`(const char_t* name) const;` + * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;` + * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* attr_name, const char_t* attr_value) const;` + [lbr] + + * `const char_t* `[link xml_node::child_value child_value]`() const;` + * `const char_t* `[link xml_node::child_value child_value]`(const char_t* name) const;` + [lbr] + + * `typedef xml_node_iterator `[link xml_node_iterator iterator]`;` + * `iterator `[link xml_node::begin begin]`() const;` + * `iterator `[link xml_node::end end]`() const;` + [lbr] + + * `typedef xml_attribute_iterator `[link xml_attribute_iterator attribute_iterator]`;` + * `attribute_iterator `[link xml_node::attributes_begin attributes_begin]`() const;` + * `attribute_iterator `[link xml_node::attributes_end attributes_end]`() const;` + [lbr] + + * `bool `[link xml_node::traverse traverse]`(xml_tree_walker& walker);` + [lbr] + + * `template xml_attribute `[link xml_node::find_attribute find_attribute]`(Predicate pred) const;` + * `template xml_node `[link xml_node::find_child find_child]`(Predicate pred) const;` + * `template xml_node `[link xml_node::find_node find_node]`(Predicate pred) const;` + [lbr] + + * `string_t `[link xml_node::path path]`(char_t delimiter = '/') const;` + * `xml_node `[link xml_node::first_element_by_path]`(const char_t* path, char_t delimiter = '/') const;` + * `xml_node `[link xml_node::root root]`() const;` + * `ptrdiff_t `[link xml_node::offset_debug offset_debug]`() const;` + [lbr] + + * `bool `[link xml_node::set_name set_name]`(const char_t* rhs);` + * `bool `[link xml_node::set_value set_value]`(const char_t* rhs);` + [lbr] + + * `xml_attribute `[link xml_node::append_attribute append_attribute]`(const char_t* name);` + * `xml_attribute `[link xml_node::insert_attribute_after insert_attribute_after]`(const char_t* name, const xml_attribute& attr);` + * `xml_attribute `[link xml_node::insert_attribute_before insert_attribute_before]`(const char_t* name, const xml_attribute& attr);` + [lbr] + + * `xml_node `[link xml_node::append_child append_child]`(xml_node_type type = node_element);` + * `xml_node `[link xml_node::insert_child_after insert_child_after]`(xml_node_type type, const xml_node& node);` + * `xml_node `[link xml_node::insert_child_before insert_child_before]`(xml_node_type type, const xml_node& node);` + [lbr] + + * `xml_attribute `[link xml_node::append_copy append_copy]`(const xml_attribute& proto);` + * `xml_attribute `[link xml_node::insert_copy_after insert_copy_after]`(const xml_attribute& proto, const xml_attribute& attr);` + * `xml_attribute `[link xml_node::insert_copy_before insert_copy_before]`(const xml_attribute& proto, const xml_attribute& attr);` + [lbr] + + * `xml_node `[link xml_node::append_copy append_copy]`(const xml_node& proto);` + * `xml_node `[link xml_node::insert_copy_after insert_copy_after]`(const xml_node& proto, const xml_node& node);` + * `xml_node `[link xml_node::insert_copy_before insert_copy_before]`(const xml_node& proto, const xml_node& node);` + [lbr] + + * `bool `[link xml_node::remove_attribute remove_attribute]`(const xml_attribute& a);` + * `bool `[link xml_node::remove_attribute remove_attribute]`(const char_t* name);` + * `bool `[link xml_node::remove_child remove_child]`(const xml_node& n);` + * `bool `[link xml_node::remove_child remove_child]`(const char_t* name);` + [lbr] + + * `void `[link xml_node::print print]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` + * `void `[link xml_node::print_stream print]`(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` + * `void `[link xml_node::print_stream print]`(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const;` + [lbr] + + * `xpath_node `[link xml_node::select_single_node select_single_node]`(const char_t* query) const;` + * `xpath_node `[link xml_node::select_single_node_precomp select_single_node]`(const xpath_query& query) const;` + * `xpath_node_set `[link xml_node::select_nodes select_nodes]`(const char_t* query) const;` + * `xpath_node_set `[link xml_node::select_nodes_precomp select_nodes]`(const xpath_query& query) const;` + [lbr] + +* `class `[link xml_document] + * [link xml_document::ctor xml_document]`();` + * `~`[link xml_document::dtor xml_document]`();` + [lbr] + + * `xml_parse_result `[link xml_document::load_stream load]`(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + * `xml_parse_result `[link xml_document::load_stream load]`(std::wistream& stream, unsigned int options = parse_default);` + [lbr] + + * `xml_parse_result `[link xml_document::load_string load]`(const char_t* contents, unsigned int options = parse_default);` + [lbr] + + * `xml_parse_result `[link xml_document::load_file load_file]`(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + [lbr] + + * `xml_parse_result `[link xml_document::load_buffer load_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + * `xml_parse_result `[link xml_document::load_buffer_inplace load_buffer_inplace]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + * `xml_parse_result `[link xml_document::load_buffer_inplace_own load_buffer_inplace_own]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + [lbr] + + * `bool `[link xml_document::save_file save_file]`(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` + [lbr] + + * `void `[link xml_document::save_stream save]`(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` + * `void `[link xml_document::save_stream save]`(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const;` + [lbr] + + * `void `[link xml_document::save save]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` + [lbr] + +* `struct `[link xml_parse_result] + * `xml_parse_status `[link xml_parse_result::status status]`;` + * `ptrdiff_t `[link xml_parse_result::offset offset]`;` + * `xml_encoding `[link xml_parse_result::encoding encoding]`;` + [lbr] + + * `operator `[link xml_parse_result::bool bool]`() const;` + * `const char* `[link xml_parse_result::description description]`() const;` + [lbr] + +* `class `[link xml_node_iterator] +* `class `[link xml_attribute_iterator] +[lbr] + +* `class `[link xml_tree_walker] + * `virtual bool `[link xml_tree_walker::begin begin]`(xml_node& node);` + * `virtual bool `[link xml_tree_walker::for_each for_each]`(xml_node& node) = 0;` + * `virtual bool `[link xml_tree_walker::end end]`(xml_node& node);` + [lbr] + + * `int `[link xml_tree_walker::depth depth]`() const;` + [lbr] + +* `class `[link xml_writer] + * `virtual void `[link xml_writer::write write]`(const void* data, size_t size) = 0;` + [lbr] + +* `class `[link xml_writer_file]`: public xml_writer` + * [link xml_writer_file]`(void* file);` + [lbr] + +* `class `[link xml_writer_stream]`: public xml_writer` + * [link xml_writer_stream]`(std::ostream& stream);` + * [link xml_writer_stream]`(std::wostream& stream);` + [lbr] + +* `class `[link xpath_query] + * `explicit `[link xpath_query::ctor xpath_query]`(const char_t* query);` + [lbr] + + * `bool `[link xpath_query::evaluate_boolean evaluate_boolean]`(const xml_node& n) const;` + * `double `[link xpath_query::evaluate_number evaluate_number]`(const xml_node& n) const;` + * `string_t `[link xpath_query::evaluate_string evaluate_string]`(const xml_node& n) const;` + * `xpath_node_set `[link xpath_query::evaluate_node_set evaluate_node_set]`(const xml_node& n) const;` + [lbr] + + * `xpath_value_type `[link xpath_query::return_type return_type]`() const;` + [lbr] + +* `class `[link xpath_exception]`: public std::exception` + * `virtual const char* `[link xpath_exception::what what]`() const throw();` + [lbr] + +* `class `[link xpath_node] + * [link xpath_node::ctor xpath_node]`();` + * [link xpath_node::ctor xpath_node]`(const xml_node& node);` + * [link xpath_node::ctor xpath_node]`(const xml_attribute& attribute, const xml_node& parent);` + [lbr] + + * `xml_node `[link xpath_node::node node]`() const;` + * `xml_attribute `[link xpath_node::attribute attribute]`() const;` + * `xml_node `[link xpath_node::parent parent]`() const;` + [lbr] + + * `operator `[link xpath_node::unspecified_bool_type unspecified_bool_type]`() const;` + * `bool `[link xpath_node::comparison operator==]`(const xpath_node& n) const;` + * `bool `[link xpath_node::comparison operator!=]`(const xpath_node& n) const;` + [lbr] + +* `class `[link xpath_node_set] + * `typedef const xpath_node* `[link xpath_node_set::const_iterator const_iterator]`;` + * `const_iterator `[link xpath_node_set::begin begin]`() const;` + * `const_iterator `[link xpath_node_set::end end]`() const;` + [lbr] + + * `const xpath_node& `[link xpath_node_set::index operator\[\]]`(size_t index) const;` + * `size_t `[link xpath_node_set::size size]`() const;` + * `bool `[link xpath_node_set::empty empty]`() const;` + [lbr] + + * `xpath_node `[link xpath_node_set::first first]`() const;` + [lbr] + + * `enum type_t {`[link xpath_node_set::type_unsorted type_unsorted], [link xpath_node_set::type_sorted type_sorted], [link xpath_node_set::type_sorted_reverse type_sorted_reverse]`};` + * `type_t `[link xpath_node_set::type type]`() const;` + * `void `[link xpath_node_set::sort sort]`(bool reverse = false);` + +Functions: + +* [link as_utf8] +* [link as_wide] +* [link get_memory_allocation_function] +* [link get_memory_deallocation_function] +* [link set_memory_management_functions] + +[endsect] [/apiref] + +[section:toc Table of Contents] + +toc-placeholder + +[endsect] [/toc] + +[/ vim:et ] diff --git a/docs/manual.xsl b/docs/manual.xsl index 58d2d99..4d9345a 100644 --- a/docs/manual.xsl +++ b/docs/manual.xsl @@ -1,117 +1,117 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - manual | - | - | - Document: - - &middot; - - &middot; - - &middot; - - &middot; - | - | - | - - - - - - - - - - - - -
- - - - - - - -
-
-
- - - - - - -
- - - - -
- - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - ERROR: Autogenerated id detected for element - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + manual | + | + | + Document: + + &middot; + + &middot; + + &middot; + + &middot; + | + | + | + + + + + + + + + + + + +
+ + + + + + + +
+
+
+ + + + + + +
+ + + + +
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + ERROR: Autogenerated id detected for element + + + + + diff --git a/docs/pugixml.css b/docs/pugixml.css index 0cb9ef1..0a72f78 100644 --- a/docs/pugixml.css +++ b/docs/pugixml.css @@ -1,598 +1,598 @@ -/*============================================================================= - Copyright (c) 2004 Joel de Guzman - http://spirit.sourceforge.net/ - - Distributed under the Boost Software License, Version 1.0. (See accompany- - ing file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -=============================================================================*/ - -/*============================================================================= - Body defaults -=============================================================================*/ - - body - { - margin: 1em; - font-family: sans-serif; - } - -/*============================================================================= - Paragraphs -=============================================================================*/ - - p - { - text-align: left; - font-size: 10pt; - line-height: 1.15; - } - -/*============================================================================= - Program listings -=============================================================================*/ - - /* Code on paragraphs */ - p tt.computeroutput - { - font-size: 9pt; - } - - pre.synopsis - { - font-size: 90%; - margin: 1pc 4% 0pc 4%; - padding: 0.5pc 0.5pc 0.5pc 0.5pc; - } - - .programlisting, - .screen - { - font-size: 9pt; - display: block; - margin: 1pc 4% 0pc 4%; - padding: 0.5pc 0.5pc 0.5pc 0.5pc; - } - - /* Program listings in tables don't get borders */ - td .programlisting, - td .screen - { - margin: 0pc 0pc 0pc 0pc; - padding: 0pc 0pc 0pc 0pc; - } - -/*============================================================================= - Headings -=============================================================================*/ - - h1, h2, h3, h4, h5, h6 - { - text-align: left; - margin: 1em 0em 0.5em 0em; - font-weight: bold; - } - - h1 { font: 140% } - h2 { font: bold 140% } - h3 { font: bold 130% } - h4 { font: bold 120% } - h5 { font: italic 110% } - h6 { font: italic 100% } - - /* Top page titles */ - title, - h1.title, - h2.title - h3.title, - h4.title, - h5.title, - h6.title, - .refentrytitle - { - font-weight: bold; - margin-bottom: 1pc; - } - - h1.title { font-size: 140% } - h2.title { font-size: 140% } - h3.title { font-size: 130% } - h4.title { font-size: 120% } - h5.title { font-size: 110% } - h6.title { font-size: 100% } - - .section h1 - { - margin: 0em 0em 0.5em 0em; - font-size: 140%; - } - - .section h2 { font-size: 140% } - .section h3 { font-size: 130% } - .section h4 { font-size: 120% } - .section h5 { font-size: 110% } - .section h6 { font-size: 100% } - - /* Code on titles */ - h1 tt.computeroutput { font-size: 140% } - h2 tt.computeroutput { font-size: 140% } - h3 tt.computeroutput { font-size: 130% } - h4 tt.computeroutput { font-size: 130% } - h5 tt.computeroutput { font-size: 130% } - h6 tt.computeroutput { font-size: 130% } - - -/*============================================================================= - Author -=============================================================================*/ - - h3.author - { - font-size: 100% - } - -/*============================================================================= - Lists -=============================================================================*/ - - li - { - font-size: 10pt; - line-height: 1.3; - } - - /* Unordered lists */ - ul - { - text-align: left; - } - - /* Ordered lists */ - ol - { - text-align: left; - } - -/*============================================================================= - Links -=============================================================================*/ - - a - { - text-decoration: none; /* no underline */ - } - - a:hover - { - text-decoration: underline; - } - -/*============================================================================= - Spirit style navigation -=============================================================================*/ - - .spirit-nav - { - text-align: right; - } - - .spirit-nav a - { - color: white; - padding-left: 0.5em; - } - - .spirit-nav img - { - border-width: 0px; - } - -/*============================================================================= - Copyright footer -=============================================================================*/ - .copyright-footer - { - text-align: right; - font-size: 70%; - } - - .copyright-footer p - { - text-align: right; - font-size: 80%; - } - -/*============================================================================= - Table of contents -=============================================================================*/ - - .toc - { - margin: 1pc 4% 0pc 4%; - padding: 0.1pc 1pc 0.1pc 1pc; - font-size: 80%; - line-height: 1.15; - } - - .boost-toc - { - float: right; - padding: 0.5pc; - } - - /* Code on toc */ - .toc .computeroutput { font-size: 120% } - -/*============================================================================= - Tables -=============================================================================*/ - - .table-title, - div.table p.title - { - margin-left: 4%; - padding-right: 0.5em; - padding-left: 0.5em; - } - - .informaltable table, - .table table - { - width: 92%; - margin-left: 4%; - margin-right: 4%; - } - - div.informaltable table, - div.table table - { - padding: 4px; - } - - /* Table Cells */ - div.informaltable table tr td, - div.table table tr td - { - padding: 0.5em; - text-align: left; - font-size: 9pt; - } - - div.informaltable table tr th, - div.table table tr th - { - padding: 0.5em 0.5em 0.5em 0.5em; - border: 1pt solid white; - font-size: 80%; - } - - table.simplelist - { - width: auto !important; - margin: 0em !important; - padding: 0em !important; - border: none !important; - } - table.simplelist td - { - margin: 0em !important; - padding: 0em !important; - text-align: left !important; - font-size: 9pt !important; - border: none !important; - } - -/*============================================================================= - Blurbs -=============================================================================*/ - - div.note, - div.tip, - div.important, - div.caution, - div.warning, - p.blurb - { - font-size: 9pt; /* A little bit smaller than the main text */ - line-height: 1.2; - display: block; - margin: 1pc 4% 0pc 4%; - padding: 0.5pc 0.5pc 0.5pc 0.5pc; - } - - p.blurb img - { - padding: 1pt; - } - -/*============================================================================= - Variable Lists -=============================================================================*/ - - div.variablelist - { - margin: 1em 0; - } - - /* Make the terms in definition lists bold */ - div.variablelist dl dt, - span.term - { - font-weight: bold; - font-size: 10pt; - } - - div.variablelist table tbody tr td - { - text-align: left; - vertical-align: top; - padding: 0em 2em 0em 0em; - font-size: 10pt; - margin: 0em 0em 0.5em 0em; - line-height: 1; - } - - div.variablelist dl dt - { - margin-bottom: 0.2em; - } - - div.variablelist dl dd - { - margin: 0em 0em 0.5em 2em; - font-size: 10pt; - } - - div.variablelist table tbody tr td p, - div.variablelist dl dd p - { - margin: 0em 0em 0.5em 0em; - line-height: 1; - } - -/*============================================================================= - Misc -=============================================================================*/ - - /* Title of books and articles in bibliographies */ - span.title - { - font-style: italic; - } - - span.underline - { - text-decoration: underline; - } - - span.strikethrough - { - text-decoration: line-through; - } - - /* Copyright, Legal Notice */ - div div.legalnotice p - { - text-align: left - } - -/*============================================================================= - Colors -=============================================================================*/ - - @media screen - { - body { - background-color: #FFFFFF; - color: #000000; - } - - /* Links */ - a - { - color: #005a9c; - } - - a:visited - { - color: #9c5a9c; - } - - h1 a, h2 a, h3 a, h4 a, h5 a, h6 a, - h1 a:hover, h2 a:hover, h3 a:hover, h4 a:hover, h5 a:hover, h6 a:hover, - h1 a:visited, h2 a:visited, h3 a:visited, h4 a:visited, h5 a:visited, h6 a:visited - { - text-decoration: none; /* no underline */ - color: #000000; - } - - /* Syntax Highlighting */ - .keyword { color: #0000AA; } - .identifier { color: #000000; } - .special { color: #707070; } - .preprocessor { color: #402080; } - .char { color: teal; } - .comment { color: #800000; } - .string { color: teal; } - .number { color: teal; } - .white_bkd { background-color: #FFFFFF; } - .dk_grey_bkd { background-color: #999999; } - - /* Copyright, Legal Notice */ - .copyright - { - color: #666666; - font-size: small; - } - - div div.legalnotice p - { - color: #666666; - } - - /* Program listing */ - pre.synopsis - { - border: 1px solid #DCDCDC; - } - - .programlisting, - .screen - { - border: 1px solid #DCDCDC; - } - - td .programlisting, - td .screen - { - border: 0px solid #DCDCDC; - } - - /* Blurbs */ - div.note, - div.tip, - div.important, - div.caution, - div.warning, - p.blurb - { - border: 1px solid #DCDCDC; - } - - /* Table of contents */ - .toc - { - border: 1px solid #DCDCDC; - } - - /* Tables */ - div.informaltable table tr td, - div.table table tr td - { - border: 1px solid #DCDCDC; - } - - div.informaltable table tr th, - div.table table tr th - { - background-color: #F0F0F0; - border: 1px solid #DCDCDC; - } - - .copyright-footer - { - color: #8F8F8F; - } - - /* Misc */ - span.highlight - { - color: #00A000; - } - } - - @media print - { - /* Links */ - a - { - color: black; - } - - a:visited - { - color: black; - } - - .spirit-nav - { - display: none; - } - - /* Program listing */ - pre.synopsis - { - border: 1px solid gray; - } - - .programlisting, - .screen - { - border: 1px solid gray; - } - - td .programlisting, - td .screen - { - border: 0px solid #DCDCDC; - } - - /* Table of contents */ - .toc - { - border: 1px solid gray; - } - - .informaltable table, - .table table - { - border: 1px solid gray; - border-collapse: collapse; - } - - /* Tables */ - div.informaltable table tr td, - div.table table tr td - { - border: 1px solid gray; - } - - div.informaltable table tr th, - div.table table tr th - { - border: 1px solid gray; - } - - table.simplelist tr td - { - border: none !important; - } - - /* Misc */ - span.highlight - { - font-weight: bold; - } - } - -/*============================================================================= - Images -=============================================================================*/ - - span.inlinemediaobject img - { - vertical-align: middle; - } - -/*============================================================================== - Super and Subscript: style so that line spacing isn't effected, see - http://www.adobe.com/cfusion/communityengine/index.cfm?event=showdetails&productId=1&postId=5341 -==============================================================================*/ - -sup, -sub { - height: 0; - line-height: 1; - vertical-align: baseline; - _vertical-align: bottom; - position: relative; - -} - -sup { - bottom: 1ex; -} - -sub { - top: .5ex; -} - +/*============================================================================= + Copyright (c) 2004 Joel de Guzman + http://spirit.sourceforge.net/ + + Distributed under the Boost Software License, Version 1.0. (See accompany- + ing file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +=============================================================================*/ + +/*============================================================================= + Body defaults +=============================================================================*/ + + body + { + margin: 1em; + font-family: sans-serif; + } + +/*============================================================================= + Paragraphs +=============================================================================*/ + + p + { + text-align: left; + font-size: 10pt; + line-height: 1.15; + } + +/*============================================================================= + Program listings +=============================================================================*/ + + /* Code on paragraphs */ + p tt.computeroutput + { + font-size: 9pt; + } + + pre.synopsis + { + font-size: 90%; + margin: 1pc 4% 0pc 4%; + padding: 0.5pc 0.5pc 0.5pc 0.5pc; + } + + .programlisting, + .screen + { + font-size: 9pt; + display: block; + margin: 1pc 4% 0pc 4%; + padding: 0.5pc 0.5pc 0.5pc 0.5pc; + } + + /* Program listings in tables don't get borders */ + td .programlisting, + td .screen + { + margin: 0pc 0pc 0pc 0pc; + padding: 0pc 0pc 0pc 0pc; + } + +/*============================================================================= + Headings +=============================================================================*/ + + h1, h2, h3, h4, h5, h6 + { + text-align: left; + margin: 1em 0em 0.5em 0em; + font-weight: bold; + } + + h1 { font: 140% } + h2 { font: bold 140% } + h3 { font: bold 130% } + h4 { font: bold 120% } + h5 { font: italic 110% } + h6 { font: italic 100% } + + /* Top page titles */ + title, + h1.title, + h2.title + h3.title, + h4.title, + h5.title, + h6.title, + .refentrytitle + { + font-weight: bold; + margin-bottom: 1pc; + } + + h1.title { font-size: 140% } + h2.title { font-size: 140% } + h3.title { font-size: 130% } + h4.title { font-size: 120% } + h5.title { font-size: 110% } + h6.title { font-size: 100% } + + .section h1 + { + margin: 0em 0em 0.5em 0em; + font-size: 140%; + } + + .section h2 { font-size: 140% } + .section h3 { font-size: 130% } + .section h4 { font-size: 120% } + .section h5 { font-size: 110% } + .section h6 { font-size: 100% } + + /* Code on titles */ + h1 tt.computeroutput { font-size: 140% } + h2 tt.computeroutput { font-size: 140% } + h3 tt.computeroutput { font-size: 130% } + h4 tt.computeroutput { font-size: 130% } + h5 tt.computeroutput { font-size: 130% } + h6 tt.computeroutput { font-size: 130% } + + +/*============================================================================= + Author +=============================================================================*/ + + h3.author + { + font-size: 100% + } + +/*============================================================================= + Lists +=============================================================================*/ + + li + { + font-size: 10pt; + line-height: 1.3; + } + + /* Unordered lists */ + ul + { + text-align: left; + } + + /* Ordered lists */ + ol + { + text-align: left; + } + +/*============================================================================= + Links +=============================================================================*/ + + a + { + text-decoration: none; /* no underline */ + } + + a:hover + { + text-decoration: underline; + } + +/*============================================================================= + Spirit style navigation +=============================================================================*/ + + .spirit-nav + { + text-align: right; + } + + .spirit-nav a + { + color: white; + padding-left: 0.5em; + } + + .spirit-nav img + { + border-width: 0px; + } + +/*============================================================================= + Copyright footer +=============================================================================*/ + .copyright-footer + { + text-align: right; + font-size: 70%; + } + + .copyright-footer p + { + text-align: right; + font-size: 80%; + } + +/*============================================================================= + Table of contents +=============================================================================*/ + + .toc + { + margin: 1pc 4% 0pc 4%; + padding: 0.1pc 1pc 0.1pc 1pc; + font-size: 80%; + line-height: 1.15; + } + + .boost-toc + { + float: right; + padding: 0.5pc; + } + + /* Code on toc */ + .toc .computeroutput { font-size: 120% } + +/*============================================================================= + Tables +=============================================================================*/ + + .table-title, + div.table p.title + { + margin-left: 4%; + padding-right: 0.5em; + padding-left: 0.5em; + } + + .informaltable table, + .table table + { + width: 92%; + margin-left: 4%; + margin-right: 4%; + } + + div.informaltable table, + div.table table + { + padding: 4px; + } + + /* Table Cells */ + div.informaltable table tr td, + div.table table tr td + { + padding: 0.5em; + text-align: left; + font-size: 9pt; + } + + div.informaltable table tr th, + div.table table tr th + { + padding: 0.5em 0.5em 0.5em 0.5em; + border: 1pt solid white; + font-size: 80%; + } + + table.simplelist + { + width: auto !important; + margin: 0em !important; + padding: 0em !important; + border: none !important; + } + table.simplelist td + { + margin: 0em !important; + padding: 0em !important; + text-align: left !important; + font-size: 9pt !important; + border: none !important; + } + +/*============================================================================= + Blurbs +=============================================================================*/ + + div.note, + div.tip, + div.important, + div.caution, + div.warning, + p.blurb + { + font-size: 9pt; /* A little bit smaller than the main text */ + line-height: 1.2; + display: block; + margin: 1pc 4% 0pc 4%; + padding: 0.5pc 0.5pc 0.5pc 0.5pc; + } + + p.blurb img + { + padding: 1pt; + } + +/*============================================================================= + Variable Lists +=============================================================================*/ + + div.variablelist + { + margin: 1em 0; + } + + /* Make the terms in definition lists bold */ + div.variablelist dl dt, + span.term + { + font-weight: bold; + font-size: 10pt; + } + + div.variablelist table tbody tr td + { + text-align: left; + vertical-align: top; + padding: 0em 2em 0em 0em; + font-size: 10pt; + margin: 0em 0em 0.5em 0em; + line-height: 1; + } + + div.variablelist dl dt + { + margin-bottom: 0.2em; + } + + div.variablelist dl dd + { + margin: 0em 0em 0.5em 2em; + font-size: 10pt; + } + + div.variablelist table tbody tr td p, + div.variablelist dl dd p + { + margin: 0em 0em 0.5em 0em; + line-height: 1; + } + +/*============================================================================= + Misc +=============================================================================*/ + + /* Title of books and articles in bibliographies */ + span.title + { + font-style: italic; + } + + span.underline + { + text-decoration: underline; + } + + span.strikethrough + { + text-decoration: line-through; + } + + /* Copyright, Legal Notice */ + div div.legalnotice p + { + text-align: left + } + +/*============================================================================= + Colors +=============================================================================*/ + + @media screen + { + body { + background-color: #FFFFFF; + color: #000000; + } + + /* Links */ + a + { + color: #005a9c; + } + + a:visited + { + color: #9c5a9c; + } + + h1 a, h2 a, h3 a, h4 a, h5 a, h6 a, + h1 a:hover, h2 a:hover, h3 a:hover, h4 a:hover, h5 a:hover, h6 a:hover, + h1 a:visited, h2 a:visited, h3 a:visited, h4 a:visited, h5 a:visited, h6 a:visited + { + text-decoration: none; /* no underline */ + color: #000000; + } + + /* Syntax Highlighting */ + .keyword { color: #0000AA; } + .identifier { color: #000000; } + .special { color: #707070; } + .preprocessor { color: #402080; } + .char { color: teal; } + .comment { color: #800000; } + .string { color: teal; } + .number { color: teal; } + .white_bkd { background-color: #FFFFFF; } + .dk_grey_bkd { background-color: #999999; } + + /* Copyright, Legal Notice */ + .copyright + { + color: #666666; + font-size: small; + } + + div div.legalnotice p + { + color: #666666; + } + + /* Program listing */ + pre.synopsis + { + border: 1px solid #DCDCDC; + } + + .programlisting, + .screen + { + border: 1px solid #DCDCDC; + } + + td .programlisting, + td .screen + { + border: 0px solid #DCDCDC; + } + + /* Blurbs */ + div.note, + div.tip, + div.important, + div.caution, + div.warning, + p.blurb + { + border: 1px solid #DCDCDC; + } + + /* Table of contents */ + .toc + { + border: 1px solid #DCDCDC; + } + + /* Tables */ + div.informaltable table tr td, + div.table table tr td + { + border: 1px solid #DCDCDC; + } + + div.informaltable table tr th, + div.table table tr th + { + background-color: #F0F0F0; + border: 1px solid #DCDCDC; + } + + .copyright-footer + { + color: #8F8F8F; + } + + /* Misc */ + span.highlight + { + color: #00A000; + } + } + + @media print + { + /* Links */ + a + { + color: black; + } + + a:visited + { + color: black; + } + + .spirit-nav + { + display: none; + } + + /* Program listing */ + pre.synopsis + { + border: 1px solid gray; + } + + .programlisting, + .screen + { + border: 1px solid gray; + } + + td .programlisting, + td .screen + { + border: 0px solid #DCDCDC; + } + + /* Table of contents */ + .toc + { + border: 1px solid gray; + } + + .informaltable table, + .table table + { + border: 1px solid gray; + border-collapse: collapse; + } + + /* Tables */ + div.informaltable table tr td, + div.table table tr td + { + border: 1px solid gray; + } + + div.informaltable table tr th, + div.table table tr th + { + border: 1px solid gray; + } + + table.simplelist tr td + { + border: none !important; + } + + /* Misc */ + span.highlight + { + font-weight: bold; + } + } + +/*============================================================================= + Images +=============================================================================*/ + + span.inlinemediaobject img + { + vertical-align: middle; + } + +/*============================================================================== + Super and Subscript: style so that line spacing isn't effected, see + http://www.adobe.com/cfusion/communityengine/index.cfm?event=showdetails&productId=1&postId=5341 +==============================================================================*/ + +sup, +sub { + height: 0; + line-height: 1; + vertical-align: baseline; + _vertical-align: bottom; + position: relative; + +} + +sup { + bottom: 1ex; +} + +sub { + top: .5ex; +} + diff --git a/docs/quickstart.qbk b/docs/quickstart.qbk index 512db4a..2199b37 100644 --- a/docs/quickstart.qbk +++ b/docs/quickstart.qbk @@ -1,255 +1,255 @@ -[article pugixml - [quickbook 1.5] - - [version 0.9] - [id quickstart] - [copyright 2010 Arseny Kapoulkine] - [license Distributed under the MIT License] -] - -[template file[name]''''''[name]''''''] -[template sref[name]''''''] -[template ftnt[id text]''''''[text]''''''] - -[section:main pugixml 0.9 quick start guide] - -[section:introduction Introduction] - -pugixml is a light-weight C++ XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0 implementation for complex data-driven tree queries. Full Unicode support is also available, with Unicode interface variants and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is extremely portable and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the MIT license, making it completely free to use in both open-source and proprietary applications. - -pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can't process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD/Schema validation, the library is not for you. - -This is the quick start guide for pugixml, which purpose is to enable you to start using the library quickly. Many important library features are either not described at all or only mentioned briefly; for more complete information you [@manual.html should read the complete manual]. - -[note No documentation is perfect, neither is this one. If you encounter a description that is unclear, please file an issue as described in [sref feedback]. Also if you can spare the time for a full proof-reading, including spelling and grammar, that would be great! Please [link email send me an e-mail]; as a token of appreciation, your name will be included into the corresponding section of the manual.] - -[endsect] [/introduction] - -[section:install Installation] - -pugixml is distributed in source form. You can download a source distribution via one of the following links: - -[pre -[@http://pugixml.googlecode.com/files/pugixml-0.9.zip] -[@http://pugixml.googlecode.com/files/pugixml-0.9.tar.gz] -] - -The distribution contains library source, documentation (the guide you're reading now and the manual) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. - -The complete pugixml source consists of four files - two source files, [file pugixml.cpp] and [file pugixpath.cpp], and two header files, [file pugixml.hpp] and [file pugiconfig.hpp]. [file pugixml.hpp] is the primary header which you need to include in order to use pugixml classes/functions. The rest of this guide assumes that [file pugixml.hpp] is either in the current directory or in one of include directories of your projects, so that `#include "pugixml.hpp"` can find the header; however you can also use relative path (i.e. `#include "../libs/pugixml/src/pugixml.hpp"`) or include directory-relative path (i.e. `#include `). - -The easiest way to build pugixml is to compile two source files, [file pugixml.cpp] and [file pugixpath.cpp], along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio[ftnt trademarks All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add [file pugixml.cpp] and [file pugixpath.cpp] to one of your projects. There are other building methods available, including building pugixml as a standalone static/shared library; read the manual for further information. - -[endsect] [/install] - -[section:dom Document object model] - -pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from character stream (file, string, C++ I/O stream), then traversed via special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C++ I/O stream or custom transport). - -The root of the tree is the document itself, which corresponds to C++ type `xml_document`. Document has one or more child nodes, which correspond to C++ type `xml_node`. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C++ type `xml_attribute`, and some additional data (i.e. name). - -The most common node types are: - -* Document node (`node_document`) - this is the root of the tree, which consists of several child nodes. This node corresponds to `xml_document` class; note that `xml_document` is a sub-class of `xml_node`, so the entire node interface is also available. -* Element/tag node (`node_element`) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. -* Plain character data nodes (`node_pcdata`) represent plain text in XML. PCDATA nodes have a value, but do not have name or children/attributes. Note that plain character data is not a part of the element node but instead has its own node; for example, an element node can have several child PCDATA nodes. - -Despite the fact that there are several node types, there are only three C++ types representing the tree (`xml_document`, `xml_node`, `xml_attribute`); some operations on `xml_node` are only valid for certain node types. They are described below. - -[note All pugixml classes and functions are located in `pugi` namespace; you have to either use explicit name qualification (i.e. `pugi::xml_node`), or to gain access to relevant symbols via `using` directive (i.e. `using pugi::xml_node;` or `using namespace pugi;`).] - -`xml_document` is the owner of the entire document structure; destroying the document destroys the whole tree. The interface of `xml_document` consists of loading functions, saving functions and the interface of `xml_node`, which allows for document inspection and/or modification. Note that while `xml_document` is a sub-class of `xml_node`, `xml_node` is not a polymorphic type; the inheritance is only used to simplify usage. - -`xml_node` is the handle to document node; it can point to any node in the document, including document itself. There is a common interface for nodes of all types. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. - -There is a special value of `xml_node` type, known as null node or empty node. It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don't do anything and return empty nodes/attributes or empty strings as their result. This is useful for chaining calls; i.e. you can get the grandparent of a node like so: `node.parent().parent()`; if a node is a null node or it does not have a parent, the first `parent()` call returns null node; the second `parent()` call then also returns null node, so you don't have to check for errors twice. You can test if a handle is null via implicit boolean cast: `if (node) { ... }` or `if (!node) { ... }`. - -`xml_attribute` is the handle to an XML attribute; it has the same semantics as `xml_node`, i.e. there can be several `xml_attribute` handles pointing to the same underlying object, there is a special null attribute value, which propagates to function results. - -There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via `PUGIXML_WCHAR_MODE` define; you can set it via [file pugiconfig.hpp] or via preprocessor options. All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. Read the manual for additional information on Unicode interface. - -[endsect] [/dom] - -[section:loading Loading document] - -pugixml provides several functions for loading XML data from various places - files, C++ iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed because of performance reasons. XML data is always converted to internal character format before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions automatically. - -The most common source of XML data is files; pugixml provides a separate function for loading XML document from file. This function accepts file path as its first argument, and also two optional arguments, which specify parsing options and input data encoding, which are described in the manual. - -This is an example of loading XML document from file ([@samples/load_file.cpp]): - -[import samples/load_file.cpp] -[code_load_file] - -`load_file`, as well as other loading functions, destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an `xml_parse_result` object; this object contains the operation status, and the related information (i.e. last successfully parsed position in the input file, if parsing fails). - -Parsing result object can be implicitly converted to `bool`; if you do not want to handle parsing errors thoroughly, you can just check the return value of load functions as if it was a `bool`: `if (doc.load_file("file.xml")) { ... } else { ... }`. Otherwise you can use the `status` member to get parsing status, or the `description()` member function to get the status in a string form. - -This is an example of handling loading errors ([@samples/load_error_handling.cpp]): - -[import samples/load_error_handling.cpp] -[code_load_error_handling] - -Sometimes XML data should be loaded from some other source than file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. These scenarios either require loading document from memory, in which case you should prepare a contiguous memory block with all XML data and to pass it to one of buffer loading functions, or loading document from C++ IOstream, in which case you should provide an object which implements `std::istream` or `std::wistream` interface. - -There are different functions for loading document from memory; they treat the passed buffer as either an immutable one (`load_buffer`), a mutable buffer which is owned by the caller (`load_buffer_inplace`), or a mutable buffer which ownership belongs to pugixml (`load_buffer_inplace_own`). There is also a simple helper function, `xml_document::load`, for cases when you want to load the XML document from null-terminated character string. - -This is an example of loading XML document from memory using one of these functions ([@samples/load_memory.cpp]); read the sample code for more examples: - -[import samples/load_memory.cpp] -[code_load_memory_decl] -[code_load_memory_buffer_inplace] - -This is a simple example of loading XML document from file using streams ([@samples/load_stream.cpp]); read the sample code for more complex examples involving wide streams and locales: - -[import samples/load_stream.cpp] -[code_load_stream] - -[endsect] [/loading] - -[section:access Accessing document data] - -pugixml features an extensive interface for getting various types of data from the document and for traversing the document. You can use various accessors to get node/attribute data, you can traverse the child node/attribute lists via accessors or iterators, you can do depth-first traversals with `xml_tree_walker` objects, and you can use XPath for complex data-driven queries. - -You can get node or attribute name via `name()` accessor, and value via `value()` accessor. Note that both functions never return null pointers - they either return a string with the relevant content, or an empty string if name/value is absent or if the handle is null. Also there are two notable things for reading values: - -* It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type `node_pcdata` with value `"This is a node"`. pugixml provides `child_value()` helper functions to parse such data. - -* In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type. - -This is an example of using these functions ([@samples/traverse_base.cpp]): - -[import samples/traverse_base.cpp] -[code_traverse_base_data] - -Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose. For example, `child("Tool")` returns the first node which has the name `"Tool"`, or null handle if there is no such node. This is an example of using such functions ([@samples/traverse_base.cpp]): - -[code_traverse_base_contents] - -Child node lists and attribute lists are simply double-linked lists; while you can use `previous_sibling`/`next_sibling` and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes. All iterators are bidirectional and support all usual iterator operations. The iterators are invalidated if the node\/attribute objects they're pointing to are removed from the tree; adding nodes\/attributes does not invalidate any iterators. - -Here is an example of using iterators for document traversal ([@samples/traverse_iter.cpp]): - -[import samples/traverse_iter.cpp] -[code_traverse_iter] - -The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you'll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement `xml_tree_walker` interface and to call `traverse` function. - -This is an example of traversing tree hierarchy with xml_tree_walker ([@samples/traverse_walker.cpp]): - -[import samples/traverse_walker.cpp] -[code_traverse_walker_impl] -[code_traverse_walker_traverse] - -Finally, for complex queries often a higher-level DSL is needed. pugixml provides an implementation of XPath 1.0 language for such queries. The complete description of XPath usage can be found in the manual, but here are some examples: - -[import samples/xpath_select.cpp] -[code_xpath_select] - -[caution XPath functions throw `xpath_exception` objects on error; the sample above does not catch these exceptions.] - -[endsect] [/access] - -[section:modify Modifying document data] - -The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead. - -All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: `void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }`, so const-correctness here mainly provides additional documentation. - -As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. You can use `set_name` and `set_value` member functions to set them. Similar functions are available for attributes; however, the `set_value` function is overloaded for some other types except strings, like floating-point numbers. Also, attribute value can be set using an assignment operator. This is an example of setting node/attribute name and value ([@samples/modify_base.cpp]): - -[import samples/modify_base.cpp] -[code_modify_base_node] -[code_modify_base_attr] - -Nodes and attributes do not exist outside of document tree, so you can't create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before\/after some other node. All insertion functions return the handle to newly created object on success, and null handle on failure. Even if the operation fails (for example, if you're trying to add a child node to PCDATA node), the document remains in consistent state, but the requested node/attribute is not added. - -[caution attribute() and child() functions do not add attributes or nodes to the tree, so code like `node.attribute("id") = 123;` will not do anything if `node` does not have an attribute with name `"id"`. Make sure you're operating with existing attributes/nodes by adding them if necessary.] - -This is an example of adding new attributes\/nodes to the document ([@samples/modify_add.cpp]): - -[import samples/modify_add.cpp] -[code_modify_add] - -[#xml_node::remove_attribute][#xml_node::remove_child] -If you do not want your document to contain some node or attribute, you can remove it with `remove_attribute` and `remove_child` functions. Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute\/node is removed. - -This is an example of removing attributes\/nodes from the document ([@samples/modify_remove.cpp]): - -[import samples/modify_remove.cpp] -[code_modify_remove] - -[endsect] [/modify] - -[section:saving Saving document] - -Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format, and also perform necessary encoding conversions. - -The node/attribute data is written to the destination properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped. In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For proper output, make sure all node and attribute names are set to meaningful values. - -If you want to save the whole document to a file, you can use the `save_file` function, which returns `true` on success. This is a simple example of saving XML document to file ([@samples/save_file.cpp]): - -[import samples/save_file.cpp] -[code_save_file] - -For additional interoperability pugixml provides functions for saving document to any object which implements C++ std::ostream interface. This allows you to save documents to any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use `std::cout` stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones. - -This is a simple example of saving XML document to standard output ([@samples/save_stream.cpp]): - -[import samples/save_stream.cpp] -[code_save_stream] - -All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input. In order to output the document via some custom transport, for example sockets, you should create an object which implements `xml_writer_file` interface and pass it to `xml_document::save` function. - -This is a simple example of custom writer for saving document data to STL string ([@samples/save_custom_writer.cpp]); read the sample code for more complex examples: - -[import samples/save_custom_writer.cpp] -[code_save_custom_writer] - -While the previously described functions saved the whole document to the destination, it is easy to save a single subtree. Instead of calling `xml_document::save`, just call `xml_node::print` function on the target node. You can save node contents to C++ IOstream object or custom writer in this way. Saving a subtree slightly differs from saving the whole document; read the manual for more information. - -[endsect] [/saving] - -[section:feedback Feedback] - -If you believe you've found a bug in pugixml, please file an issue via [@http://code.google.com/p/pugixml/issues/entry issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. Feature requests and contributions can be filed as issues, too. - -[#email] -If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: [@mailto:arseny.kapoulkine@gmail.com arseny.kapoulkine@gmail.com]. - -[endsect] [/feedback] - -[section:license License] - -The pugixml library is distributed under the MIT license: - -[: -Copyright (c) 2006-2010 Arseny Kapoulkine - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. -] - -[endsect] [/license] - -[endsect] [/main] - -[/ vim:et ] +[article pugixml + [quickbook 1.5] + + [version 0.9] + [id quickstart] + [copyright 2010 Arseny Kapoulkine] + [license Distributed under the MIT License] +] + +[template file[name]''''''[name]''''''] +[template sref[name]''''''] +[template ftnt[id text]''''''[text]''''''] + +[section:main pugixml 0.9 quick start guide] + +[section:introduction Introduction] + +pugixml is a light-weight C++ XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0 implementation for complex data-driven tree queries. Full Unicode support is also available, with Unicode interface variants and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is extremely portable and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the MIT license, making it completely free to use in both open-source and proprietary applications. + +pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can't process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD/Schema validation, the library is not for you. + +This is the quick start guide for pugixml, which purpose is to enable you to start using the library quickly. Many important library features are either not described at all or only mentioned briefly; for more complete information you [@manual.html should read the complete manual]. + +[note No documentation is perfect, neither is this one. If you encounter a description that is unclear, please file an issue as described in [sref feedback]. Also if you can spare the time for a full proof-reading, including spelling and grammar, that would be great! Please [link email send me an e-mail]; as a token of appreciation, your name will be included into the corresponding section of the manual.] + +[endsect] [/introduction] + +[section:install Installation] + +pugixml is distributed in source form. You can download a source distribution via one of the following links: + +[pre +[@http://pugixml.googlecode.com/files/pugixml-0.9.zip] +[@http://pugixml.googlecode.com/files/pugixml-0.9.tar.gz] +] + +The distribution contains library source, documentation (the guide you're reading now and the manual) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. + +The complete pugixml source consists of four files - two source files, [file pugixml.cpp] and [file pugixpath.cpp], and two header files, [file pugixml.hpp] and [file pugiconfig.hpp]. [file pugixml.hpp] is the primary header which you need to include in order to use pugixml classes/functions. The rest of this guide assumes that [file pugixml.hpp] is either in the current directory or in one of include directories of your projects, so that `#include "pugixml.hpp"` can find the header; however you can also use relative path (i.e. `#include "../libs/pugixml/src/pugixml.hpp"`) or include directory-relative path (i.e. `#include `). + +The easiest way to build pugixml is to compile two source files, [file pugixml.cpp] and [file pugixpath.cpp], along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio[ftnt trademarks All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add [file pugixml.cpp] and [file pugixpath.cpp] to one of your projects. There are other building methods available, including building pugixml as a standalone static/shared library; read the manual for further information. + +[endsect] [/install] + +[section:dom Document object model] + +pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from character stream (file, string, C++ I/O stream), then traversed via special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C++ I/O stream or custom transport). + +The root of the tree is the document itself, which corresponds to C++ type `xml_document`. Document has one or more child nodes, which correspond to C++ type `xml_node`. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C++ type `xml_attribute`, and some additional data (i.e. name). + +The most common node types are: + +* Document node (`node_document`) - this is the root of the tree, which consists of several child nodes. This node corresponds to `xml_document` class; note that `xml_document` is a sub-class of `xml_node`, so the entire node interface is also available. +* Element/tag node (`node_element`) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. +* Plain character data nodes (`node_pcdata`) represent plain text in XML. PCDATA nodes have a value, but do not have name or children/attributes. Note that plain character data is not a part of the element node but instead has its own node; for example, an element node can have several child PCDATA nodes. + +Despite the fact that there are several node types, there are only three C++ types representing the tree (`xml_document`, `xml_node`, `xml_attribute`); some operations on `xml_node` are only valid for certain node types. They are described below. + +[note All pugixml classes and functions are located in `pugi` namespace; you have to either use explicit name qualification (i.e. `pugi::xml_node`), or to gain access to relevant symbols via `using` directive (i.e. `using pugi::xml_node;` or `using namespace pugi;`).] + +`xml_document` is the owner of the entire document structure; destroying the document destroys the whole tree. The interface of `xml_document` consists of loading functions, saving functions and the interface of `xml_node`, which allows for document inspection and/or modification. Note that while `xml_document` is a sub-class of `xml_node`, `xml_node` is not a polymorphic type; the inheritance is only used to simplify usage. + +`xml_node` is the handle to document node; it can point to any node in the document, including document itself. There is a common interface for nodes of all types. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. + +There is a special value of `xml_node` type, known as null node or empty node. It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don't do anything and return empty nodes/attributes or empty strings as their result. This is useful for chaining calls; i.e. you can get the grandparent of a node like so: `node.parent().parent()`; if a node is a null node or it does not have a parent, the first `parent()` call returns null node; the second `parent()` call then also returns null node, so you don't have to check for errors twice. You can test if a handle is null via implicit boolean cast: `if (node) { ... }` or `if (!node) { ... }`. + +`xml_attribute` is the handle to an XML attribute; it has the same semantics as `xml_node`, i.e. there can be several `xml_attribute` handles pointing to the same underlying object, there is a special null attribute value, which propagates to function results. + +There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via `PUGIXML_WCHAR_MODE` define; you can set it via [file pugiconfig.hpp] or via preprocessor options. All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. Read the manual for additional information on Unicode interface. + +[endsect] [/dom] + +[section:loading Loading document] + +pugixml provides several functions for loading XML data from various places - files, C++ iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed because of performance reasons. XML data is always converted to internal character format before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions automatically. + +The most common source of XML data is files; pugixml provides a separate function for loading XML document from file. This function accepts file path as its first argument, and also two optional arguments, which specify parsing options and input data encoding, which are described in the manual. + +This is an example of loading XML document from file ([@samples/load_file.cpp]): + +[import samples/load_file.cpp] +[code_load_file] + +`load_file`, as well as other loading functions, destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an `xml_parse_result` object; this object contains the operation status, and the related information (i.e. last successfully parsed position in the input file, if parsing fails). + +Parsing result object can be implicitly converted to `bool`; if you do not want to handle parsing errors thoroughly, you can just check the return value of load functions as if it was a `bool`: `if (doc.load_file("file.xml")) { ... } else { ... }`. Otherwise you can use the `status` member to get parsing status, or the `description()` member function to get the status in a string form. + +This is an example of handling loading errors ([@samples/load_error_handling.cpp]): + +[import samples/load_error_handling.cpp] +[code_load_error_handling] + +Sometimes XML data should be loaded from some other source than file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. These scenarios either require loading document from memory, in which case you should prepare a contiguous memory block with all XML data and to pass it to one of buffer loading functions, or loading document from C++ IOstream, in which case you should provide an object which implements `std::istream` or `std::wistream` interface. + +There are different functions for loading document from memory; they treat the passed buffer as either an immutable one (`load_buffer`), a mutable buffer which is owned by the caller (`load_buffer_inplace`), or a mutable buffer which ownership belongs to pugixml (`load_buffer_inplace_own`). There is also a simple helper function, `xml_document::load`, for cases when you want to load the XML document from null-terminated character string. + +This is an example of loading XML document from memory using one of these functions ([@samples/load_memory.cpp]); read the sample code for more examples: + +[import samples/load_memory.cpp] +[code_load_memory_decl] +[code_load_memory_buffer_inplace] + +This is a simple example of loading XML document from file using streams ([@samples/load_stream.cpp]); read the sample code for more complex examples involving wide streams and locales: + +[import samples/load_stream.cpp] +[code_load_stream] + +[endsect] [/loading] + +[section:access Accessing document data] + +pugixml features an extensive interface for getting various types of data from the document and for traversing the document. You can use various accessors to get node/attribute data, you can traverse the child node/attribute lists via accessors or iterators, you can do depth-first traversals with `xml_tree_walker` objects, and you can use XPath for complex data-driven queries. + +You can get node or attribute name via `name()` accessor, and value via `value()` accessor. Note that both functions never return null pointers - they either return a string with the relevant content, or an empty string if name/value is absent or if the handle is null. Also there are two notable things for reading values: + +* It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type `node_pcdata` with value `"This is a node"`. pugixml provides `child_value()` helper functions to parse such data. + +* In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type. + +This is an example of using these functions ([@samples/traverse_base.cpp]): + +[import samples/traverse_base.cpp] +[code_traverse_base_data] + +Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose. For example, `child("Tool")` returns the first node which has the name `"Tool"`, or null handle if there is no such node. This is an example of using such functions ([@samples/traverse_base.cpp]): + +[code_traverse_base_contents] + +Child node lists and attribute lists are simply double-linked lists; while you can use `previous_sibling`/`next_sibling` and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes. All iterators are bidirectional and support all usual iterator operations. The iterators are invalidated if the node\/attribute objects they're pointing to are removed from the tree; adding nodes\/attributes does not invalidate any iterators. + +Here is an example of using iterators for document traversal ([@samples/traverse_iter.cpp]): + +[import samples/traverse_iter.cpp] +[code_traverse_iter] + +The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you'll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement `xml_tree_walker` interface and to call `traverse` function. + +This is an example of traversing tree hierarchy with xml_tree_walker ([@samples/traverse_walker.cpp]): + +[import samples/traverse_walker.cpp] +[code_traverse_walker_impl] +[code_traverse_walker_traverse] + +Finally, for complex queries often a higher-level DSL is needed. pugixml provides an implementation of XPath 1.0 language for such queries. The complete description of XPath usage can be found in the manual, but here are some examples: + +[import samples/xpath_select.cpp] +[code_xpath_select] + +[caution XPath functions throw `xpath_exception` objects on error; the sample above does not catch these exceptions.] + +[endsect] [/access] + +[section:modify Modifying document data] + +The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead. + +All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: `void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }`, so const-correctness here mainly provides additional documentation. + +As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. You can use `set_name` and `set_value` member functions to set them. Similar functions are available for attributes; however, the `set_value` function is overloaded for some other types except strings, like floating-point numbers. Also, attribute value can be set using an assignment operator. This is an example of setting node/attribute name and value ([@samples/modify_base.cpp]): + +[import samples/modify_base.cpp] +[code_modify_base_node] +[code_modify_base_attr] + +Nodes and attributes do not exist outside of document tree, so you can't create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before\/after some other node. All insertion functions return the handle to newly created object on success, and null handle on failure. Even if the operation fails (for example, if you're trying to add a child node to PCDATA node), the document remains in consistent state, but the requested node/attribute is not added. + +[caution attribute() and child() functions do not add attributes or nodes to the tree, so code like `node.attribute("id") = 123;` will not do anything if `node` does not have an attribute with name `"id"`. Make sure you're operating with existing attributes/nodes by adding them if necessary.] + +This is an example of adding new attributes\/nodes to the document ([@samples/modify_add.cpp]): + +[import samples/modify_add.cpp] +[code_modify_add] + +[#xml_node::remove_attribute][#xml_node::remove_child] +If you do not want your document to contain some node or attribute, you can remove it with `remove_attribute` and `remove_child` functions. Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute\/node is removed. + +This is an example of removing attributes\/nodes from the document ([@samples/modify_remove.cpp]): + +[import samples/modify_remove.cpp] +[code_modify_remove] + +[endsect] [/modify] + +[section:saving Saving document] + +Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format, and also perform necessary encoding conversions. + +The node/attribute data is written to the destination properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped. In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For proper output, make sure all node and attribute names are set to meaningful values. + +If you want to save the whole document to a file, you can use the `save_file` function, which returns `true` on success. This is a simple example of saving XML document to file ([@samples/save_file.cpp]): + +[import samples/save_file.cpp] +[code_save_file] + +For additional interoperability pugixml provides functions for saving document to any object which implements C++ std::ostream interface. This allows you to save documents to any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use `std::cout` stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones. + +This is a simple example of saving XML document to standard output ([@samples/save_stream.cpp]): + +[import samples/save_stream.cpp] +[code_save_stream] + +All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input. In order to output the document via some custom transport, for example sockets, you should create an object which implements `xml_writer_file` interface and pass it to `xml_document::save` function. + +This is a simple example of custom writer for saving document data to STL string ([@samples/save_custom_writer.cpp]); read the sample code for more complex examples: + +[import samples/save_custom_writer.cpp] +[code_save_custom_writer] + +While the previously described functions saved the whole document to the destination, it is easy to save a single subtree. Instead of calling `xml_document::save`, just call `xml_node::print` function on the target node. You can save node contents to C++ IOstream object or custom writer in this way. Saving a subtree slightly differs from saving the whole document; read the manual for more information. + +[endsect] [/saving] + +[section:feedback Feedback] + +If you believe you've found a bug in pugixml, please file an issue via [@http://code.google.com/p/pugixml/issues/entry issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. Feature requests and contributions can be filed as issues, too. + +[#email] +If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: [@mailto:arseny.kapoulkine@gmail.com arseny.kapoulkine@gmail.com]. + +[endsect] [/feedback] + +[section:license License] + +The pugixml library is distributed under the MIT license: + +[: +Copyright (c) 2006-2010 Arseny Kapoulkine + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. +] + +[endsect] [/license] + +[endsect] [/main] + +[/ vim:et ] diff --git a/docs/quickstart.xsl b/docs/quickstart.xsl index 1e2af8d..0ad5704 100644 --- a/docs/quickstart.xsl +++ b/docs/quickstart.xsl @@ -1,8 +1,8 @@ - - - section toc - - - - - + + + section toc + + + + + diff --git a/docs/samples/character.xml b/docs/samples/character.xml index b0f6f3f..078e0bf 100644 --- a/docs/samples/character.xml +++ b/docs/samples/character.xml @@ -1,8 +1,8 @@ - - - - - - - - + + + + + + + + diff --git a/docs/samples/custom_memory_management.cpp b/docs/samples/custom_memory_management.cpp index 7e69528..92ccb71 100644 --- a/docs/samples/custom_memory_management.cpp +++ b/docs/samples/custom_memory_management.cpp @@ -1,27 +1,27 @@ -#include "pugixml.hpp" - -#include - -//[code_custom_memory_management_decl -void* custom_allocate(size_t size) -{ - return new (std::nothrow) char[size]; -} - -void custom_deallocate(void* ptr) -{ - delete[] static_cast(ptr); -} -//] - -int main() -{ -//[code_custom_memory_management_call - pugi::set_memory_management_functions(custom_allocate, custom_deallocate); -//] - - pugi::xml_document doc; - doc.load(""); -} - -// vim:et +#include "pugixml.hpp" + +#include + +//[code_custom_memory_management_decl +void* custom_allocate(size_t size) +{ + return new (std::nothrow) char[size]; +} + +void custom_deallocate(void* ptr) +{ + delete[] static_cast(ptr); +} +//] + +int main() +{ +//[code_custom_memory_management_call + pugi::set_memory_management_functions(custom_allocate, custom_deallocate); +//] + + pugi::xml_document doc; + doc.load(""); +} + +// vim:et diff --git a/docs/samples/include.cpp b/docs/samples/include.cpp index 0d80887..fa615a4 100644 --- a/docs/samples/include.cpp +++ b/docs/samples/include.cpp @@ -1,64 +1,64 @@ -#include "pugixml.hpp" - -#include -#include - -//[code_include -bool load_preprocess(pugi::xml_document& doc, const char* path); - -bool preprocess(pugi::xml_node node) -{ - for (pugi::xml_node child = node.first_child(); child; ) - { - if (child.type() == pugi::node_pi && strcmp(child.name(), "include") == 0) - { - pugi::xml_node include = child; - - // load new preprocessed document (note: ideally this should handle relative paths) - const char* path = include.value(); - - pugi::xml_document doc; - if (!load_preprocess(doc, path)) return false; - - // insert the comment marker above include directive - node.insert_child_before(pugi::node_comment, include).set_value(path); - - // copy the document above the include directive (this retains the original order!) - for (pugi::xml_node ic = doc.first_child(); ic; ic = ic.next_sibling()) - { - node.insert_copy_before(ic, include); - } - - // remove the include node and move to the next child - child = child.next_sibling(); - - node.remove_child(include); - } - else - { - if (!preprocess(child)) return false; - - child = child.next_sibling(); - } - } - - return true; -} - -bool load_preprocess(pugi::xml_document& doc, const char* path) -{ - pugi::xml_parse_result result = doc.load_file(path, pugi::parse_default | pugi::parse_pi); // for - - return result ? preprocess(doc) : false; -} -//] - -int main() -{ - pugi::xml_document doc; - if (!load_preprocess(doc, "character.xml")) return -1; - - doc.print(std::cout); -} - -// vim:et +#include "pugixml.hpp" + +#include +#include + +//[code_include +bool load_preprocess(pugi::xml_document& doc, const char* path); + +bool preprocess(pugi::xml_node node) +{ + for (pugi::xml_node child = node.first_child(); child; ) + { + if (child.type() == pugi::node_pi && strcmp(child.name(), "include") == 0) + { + pugi::xml_node include = child; + + // load new preprocessed document (note: ideally this should handle relative paths) + const char* path = include.value(); + + pugi::xml_document doc; + if (!load_preprocess(doc, path)) return false; + + // insert the comment marker above include directive + node.insert_child_before(pugi::node_comment, include).set_value(path); + + // copy the document above the include directive (this retains the original order!) + for (pugi::xml_node ic = doc.first_child(); ic; ic = ic.next_sibling()) + { + node.insert_copy_before(ic, include); + } + + // remove the include node and move to the next child + child = child.next_sibling(); + + node.remove_child(include); + } + else + { + if (!preprocess(child)) return false; + + child = child.next_sibling(); + } + } + + return true; +} + +bool load_preprocess(pugi::xml_document& doc, const char* path) +{ + pugi::xml_parse_result result = doc.load_file(path, pugi::parse_default | pugi::parse_pi); // for + + return result ? preprocess(doc) : false; +} +//] + +int main() +{ + pugi::xml_document doc; + if (!load_preprocess(doc, "character.xml")) return -1; + + doc.print(std::cout); +} + +// vim:et diff --git a/docs/samples/load_error_handling.cpp b/docs/samples/load_error_handling.cpp index c7f44b8..18dd331 100644 --- a/docs/samples/load_error_handling.cpp +++ b/docs/samples/load_error_handling.cpp @@ -1,31 +1,31 @@ -#include "pugixml.hpp" - -#include - -void check_xml(const char* source) -{ -//[code_load_error_handling - pugi::xml_document doc; - pugi::xml_parse_result result = doc.load(source); - - if (result) - std::cout << "XML [" << source << "] parsed without errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n\n"; - else - { - std::cout << "XML [" << source << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n"; - std::cout << "Error description: " << result.description() << "\n"; - std::cout << "Error offset: " << result.offset << " (error at [..." << (source + result.offset) << "]\n\n"; - } -//] -} - -int main() -{ - check_xml("text"); - check_xml("text"); - check_xml("text"); - check_xml("<#tag />"); -} - -// vim:et +#include "pugixml.hpp" + +#include + +void check_xml(const char* source) +{ +//[code_load_error_handling + pugi::xml_document doc; + pugi::xml_parse_result result = doc.load(source); + + if (result) + std::cout << "XML [" << source << "] parsed without errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n\n"; + else + { + std::cout << "XML [" << source << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n"; + std::cout << "Error description: " << result.description() << "\n"; + std::cout << "Error offset: " << result.offset << " (error at [..." << (source + result.offset) << "]\n\n"; + } +//] +} + +int main() +{ + check_xml("text"); + check_xml("text"); + check_xml("text"); + check_xml("<#tag />"); +} + +// vim:et diff --git a/docs/samples/load_file.cpp b/docs/samples/load_file.cpp index 3e77f04..f7b06c9 100644 --- a/docs/samples/load_file.cpp +++ b/docs/samples/load_file.cpp @@ -1,16 +1,16 @@ -#include "pugixml.hpp" - -#include - -int main() -{ -//[code_load_file - pugi::xml_document doc; - - pugi::xml_parse_result result = doc.load_file("tree.xml"); - - std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; -//] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ +//[code_load_file + pugi::xml_document doc; + + pugi::xml_parse_result result = doc.load_file("tree.xml"); + + std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; +//] +} + +// vim:et diff --git a/docs/samples/load_memory.cpp b/docs/samples/load_memory.cpp index 67d5dee..365fb64 100644 --- a/docs/samples/load_memory.cpp +++ b/docs/samples/load_memory.cpp @@ -1,64 +1,64 @@ -#include "pugixml.hpp" - -#include - -int main() -{ -//[code_load_memory_decl - const char source[] = "0 0 1 1"; - size_t size = sizeof(source); -//] - - pugi::xml_document doc; - - { - //[code_load_memory_buffer - // You can use load_buffer to load document from immutable memory block: - pugi::xml_parse_result result = doc.load_buffer(source, size); - //] - - std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; - } - - { - //[code_load_memory_buffer_inplace - // You can use load_buffer_inplace to load document from mutable memory block; the block's lifetime must exceed that of document - char* buffer = new char[size]; - memcpy(buffer, source, size); - - // The block can be allocated by any method; the block is modified during parsing - pugi::xml_parse_result result = doc.load_buffer_inplace(buffer, size); - - //<- - std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; - //-> - // You have to destroy the block yourself after the document is no longer used - delete[] buffer; - //] - } - - { - //[code_load_memory_buffer_inplace_own - // You can use load_buffer_inplace_own to load document from mutable memory block and to pass the ownership of this block - // The block has to be allocated via pugixml allocation function - using i.e. operator new here is incorrect - char* buffer = static_cast(pugi::get_memory_allocation_function()(size)); - memcpy(buffer, source, size); - - // The block will be deleted by the document - pugi::xml_parse_result result = doc.load_buffer_inplace_own(buffer, size); - //] - - std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; - } - - { - //[code_load_memory_string - // You can use load to load document from null-terminated strings, for example literals: - pugi::xml_parse_result result = doc.load("0 0 1 1"); - //] - - std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; - } -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ +//[code_load_memory_decl + const char source[] = "0 0 1 1"; + size_t size = sizeof(source); +//] + + pugi::xml_document doc; + + { + //[code_load_memory_buffer + // You can use load_buffer to load document from immutable memory block: + pugi::xml_parse_result result = doc.load_buffer(source, size); + //] + + std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; + } + + { + //[code_load_memory_buffer_inplace + // You can use load_buffer_inplace to load document from mutable memory block; the block's lifetime must exceed that of document + char* buffer = new char[size]; + memcpy(buffer, source, size); + + // The block can be allocated by any method; the block is modified during parsing + pugi::xml_parse_result result = doc.load_buffer_inplace(buffer, size); + + //<- + std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; + //-> + // You have to destroy the block yourself after the document is no longer used + delete[] buffer; + //] + } + + { + //[code_load_memory_buffer_inplace_own + // You can use load_buffer_inplace_own to load document from mutable memory block and to pass the ownership of this block + // The block has to be allocated via pugixml allocation function - using i.e. operator new here is incorrect + char* buffer = static_cast(pugi::get_memory_allocation_function()(size)); + memcpy(buffer, source, size); + + // The block will be deleted by the document + pugi::xml_parse_result result = doc.load_buffer_inplace_own(buffer, size); + //] + + std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; + } + + { + //[code_load_memory_string + // You can use load to load document from null-terminated strings, for example literals: + pugi::xml_parse_result result = doc.load("0 0 1 1"); + //] + + std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; + } +} + +// vim:et diff --git a/docs/samples/load_options.cpp b/docs/samples/load_options.cpp index c192914..04b4b46 100644 --- a/docs/samples/load_options.cpp +++ b/docs/samples/load_options.cpp @@ -1,30 +1,30 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - pugi::xml_document doc; - -//[code_load_options - const char* source = "<"; - - // Parsing with default options; note that comment node is not added to the tree, and entity reference < is expanded - doc.load(source); - std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n"; - - // Parsing with additional parse_comments option; comment node is now added to the tree - doc.load(source, pugi::parse_default | pugi::parse_comments); - std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n"; - - // Parsing with additional parse_comments option and without the (default) parse_escapes option; < is not expanded - doc.load(source, (pugi::parse_default | pugi::parse_comments) & ~pugi::parse_escapes); - std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n"; - - // Parsing with minimal option mask; comment node is not added to the tree, and < is not expanded - doc.load(source, pugi::parse_minimal); - std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n"; -//] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + pugi::xml_document doc; + +//[code_load_options + const char* source = "<"; + + // Parsing with default options; note that comment node is not added to the tree, and entity reference < is expanded + doc.load(source); + std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n"; + + // Parsing with additional parse_comments option; comment node is now added to the tree + doc.load(source, pugi::parse_default | pugi::parse_comments); + std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n"; + + // Parsing with additional parse_comments option and without the (default) parse_escapes option; < is not expanded + doc.load(source, (pugi::parse_default | pugi::parse_comments) & ~pugi::parse_escapes); + std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n"; + + // Parsing with minimal option mask; comment node is not added to the tree, and < is not expanded + doc.load(source, pugi::parse_minimal); + std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n"; +//] +} + +// vim:et diff --git a/docs/samples/load_stream.cpp b/docs/samples/load_stream.cpp index c717eeb..05cfb7f 100644 --- a/docs/samples/load_stream.cpp +++ b/docs/samples/load_stream.cpp @@ -1,97 +1,97 @@ -#include "pugixml.hpp" - -#include -#include -#include - -void print_doc(const char* message, const pugi::xml_document& doc, const pugi::xml_parse_result& result) -{ - std::cout - << message - << "\t: load result '" << result.description() << "'" - << ", first character of root name: U+" << std::hex << std::uppercase << std::setw(4) << std::setfill('0') << pugi::as_wide(doc.first_child().name())[0] - << ", year: " << doc.first_child().first_child().first_child().child_value() - << std::endl; -} - -bool try_imbue(std::wistream& stream, const char* name) -{ - try - { - stream.imbue(std::locale(name)); - - return true; - } - catch (const std::exception&) - { - return false; - } -} - -int main() -{ - pugi::xml_document doc; - - { - //[code_load_stream - std::ifstream stream("weekly-utf-8.xml"); - pugi::xml_parse_result result = doc.load(stream); - //] - - // first character of root name: U+9031, year: 1997 - print_doc("UTF8 file from narrow stream", doc, result); - } - - { - std::ifstream stream("weekly-utf-16.xml"); - pugi::xml_parse_result result = doc.load(stream); - - // first character of root name: U+9031, year: 1997 - print_doc("UTF16 file from narrow stream", doc, result); - } - - { - // Since wide streams are treated as UTF-16/32 ones, you can't load the UTF-8 file from a wide stream - // directly if you have localized characters; you'll have to provide a UTF8 locale (there is no - // standard one; you can use utf8_codecvt_facet from Boost or codecvt_utf8 from C++0x) - std::wifstream stream("weekly-utf-8.xml"); - - if (try_imbue(stream, "en_US.UTF-8")) // try Linux encoding - { - pugi::xml_parse_result result = doc.load(stream); - - // first character of root name: U+00E9, year: 1997 - print_doc("UTF8 file from wide stream", doc, result); - } - else - { - std::cout << "UTF-8 locale is not available\n"; - } - } - - { - // Since wide streams are treated as UTF-16/32 ones, you can't load the UTF-16 file from a wide stream without - // using custom codecvt; you can use codecvt_utf16 from C++0x - } - - { - // Since encoding names are non-standard, you can't load the Shift-JIS (or any other non-ASCII) file - // from a wide stream portably - std::wifstream stream("weekly-shift_jis.xml"); - - if (try_imbue(stream, ".932") || // try Microsoft encoding - try_imbue(stream, "ja_JP.SJIS")) // try Linux encoding; run "localedef -i ja_JP -c -f SHIFT_JIS /usr/lib/locale/ja_JP.SJIS" to get it - { - pugi::xml_parse_result result = doc.load(stream); - - // first character of root name: U+9031, year: 1997 - print_doc("Shift-JIS file from wide stream", doc, result); - } - else - { - std::cout << "Shift-JIS locale is not available\n"; - } - } -} - -// vim:et +#include "pugixml.hpp" + +#include +#include +#include + +void print_doc(const char* message, const pugi::xml_document& doc, const pugi::xml_parse_result& result) +{ + std::cout + << message + << "\t: load result '" << result.description() << "'" + << ", first character of root name: U+" << std::hex << std::uppercase << std::setw(4) << std::setfill('0') << pugi::as_wide(doc.first_child().name())[0] + << ", year: " << doc.first_child().first_child().first_child().child_value() + << std::endl; +} + +bool try_imbue(std::wistream& stream, const char* name) +{ + try + { + stream.imbue(std::locale(name)); + + return true; + } + catch (const std::exception&) + { + return false; + } +} + +int main() +{ + pugi::xml_document doc; + + { + //[code_load_stream + std::ifstream stream("weekly-utf-8.xml"); + pugi::xml_parse_result result = doc.load(stream); + //] + + // first character of root name: U+9031, year: 1997 + print_doc("UTF8 file from narrow stream", doc, result); + } + + { + std::ifstream stream("weekly-utf-16.xml"); + pugi::xml_parse_result result = doc.load(stream); + + // first character of root name: U+9031, year: 1997 + print_doc("UTF16 file from narrow stream", doc, result); + } + + { + // Since wide streams are treated as UTF-16/32 ones, you can't load the UTF-8 file from a wide stream + // directly if you have localized characters; you'll have to provide a UTF8 locale (there is no + // standard one; you can use utf8_codecvt_facet from Boost or codecvt_utf8 from C++0x) + std::wifstream stream("weekly-utf-8.xml"); + + if (try_imbue(stream, "en_US.UTF-8")) // try Linux encoding + { + pugi::xml_parse_result result = doc.load(stream); + + // first character of root name: U+00E9, year: 1997 + print_doc("UTF8 file from wide stream", doc, result); + } + else + { + std::cout << "UTF-8 locale is not available\n"; + } + } + + { + // Since wide streams are treated as UTF-16/32 ones, you can't load the UTF-16 file from a wide stream without + // using custom codecvt; you can use codecvt_utf16 from C++0x + } + + { + // Since encoding names are non-standard, you can't load the Shift-JIS (or any other non-ASCII) file + // from a wide stream portably + std::wifstream stream("weekly-shift_jis.xml"); + + if (try_imbue(stream, ".932") || // try Microsoft encoding + try_imbue(stream, "ja_JP.SJIS")) // try Linux encoding; run "localedef -i ja_JP -c -f SHIFT_JIS /usr/lib/locale/ja_JP.SJIS" to get it + { + pugi::xml_parse_result result = doc.load(stream); + + // first character of root name: U+9031, year: 1997 + print_doc("Shift-JIS file from wide stream", doc, result); + } + else + { + std::cout << "Shift-JIS locale is not available\n"; + } + } +} + +// vim:et diff --git a/docs/samples/modify_add.cpp b/docs/samples/modify_add.cpp index 76619b3..8fecfc6 100644 --- a/docs/samples/modify_add.cpp +++ b/docs/samples/modify_add.cpp @@ -1,32 +1,32 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - pugi::xml_document doc; - - //[code_modify_add - // add node with some name - pugi::xml_node node = doc.append_child(); - node.set_name("node"); - - // add description node with text child - pugi::xml_node descr = node.append_child(); - descr.set_name("description"); - descr.append_child(pugi::node_pcdata).set_value("Simple node"); - - // add param node before the description - pugi::xml_node param = node.insert_child_before(pugi::node_element, descr); - param.set_name("param"); - - // add attributes to param node - param.append_attribute("name") = "version"; - param.append_attribute("value") = 1.1; - param.insert_attribute_after("type", param.attribute("name")) = "float"; - //] - - doc.print(std::cout); -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + pugi::xml_document doc; + + //[code_modify_add + // add node with some name + pugi::xml_node node = doc.append_child(); + node.set_name("node"); + + // add description node with text child + pugi::xml_node descr = node.append_child(); + descr.set_name("description"); + descr.append_child(pugi::node_pcdata).set_value("Simple node"); + + // add param node before the description + pugi::xml_node param = node.insert_child_before(pugi::node_element, descr); + param.set_name("param"); + + // add attributes to param node + param.append_attribute("name") = "version"; + param.append_attribute("value") = 1.1; + param.insert_attribute_after("type", param.attribute("name")) = "float"; + //] + + doc.print(std::cout); +} + +// vim:et diff --git a/docs/samples/modify_base.cpp b/docs/samples/modify_base.cpp index 7d52bd1..7d0959a 100644 --- a/docs/samples/modify_base.cpp +++ b/docs/samples/modify_base.cpp @@ -1,43 +1,43 @@ -#include "pugixml.hpp" - -#include -#include - -int main() -{ - pugi::xml_document doc; - if (!doc.load("text", pugi::parse_default | pugi::parse_comments)) return -1; - - //[code_modify_base_node - pugi::xml_node node = doc.child("node"); - - // change node name - std::cout << node.set_name("notnode"); - std::cout << ", new node name: " << node.name() << std::endl; - - // change comment text - std::cout << doc.last_child().set_value("useless comment"); - std::cout << ", new comment text: " << doc.last_child().value() << std::endl; - - // we can't change value of the element or name of the comment - std::cout << node.set_value("1") << ", " << doc.last_child().set_name("2") << std::endl; - //] - - //[code_modify_base_attr - pugi::xml_attribute attr = node.attribute("id"); - - // change attribute name/value - std::cout << attr.set_name("key") << ", " << attr.set_value("345"); - std::cout << ", new attribute: " << attr.name() << "=" << attr.value() << std::endl; - - // we can use numbers or booleans - attr.set_value(1.234); - std::cout << "new attribute value: " << attr.value() << std::endl; - - // we can also use assignment operators for more concise code - attr = true; - std::cout << "final attribute value: " << attr.value() << std::endl; - //] -} - -// vim:et +#include "pugixml.hpp" + +#include +#include + +int main() +{ + pugi::xml_document doc; + if (!doc.load("text", pugi::parse_default | pugi::parse_comments)) return -1; + + //[code_modify_base_node + pugi::xml_node node = doc.child("node"); + + // change node name + std::cout << node.set_name("notnode"); + std::cout << ", new node name: " << node.name() << std::endl; + + // change comment text + std::cout << doc.last_child().set_value("useless comment"); + std::cout << ", new comment text: " << doc.last_child().value() << std::endl; + + // we can't change value of the element or name of the comment + std::cout << node.set_value("1") << ", " << doc.last_child().set_name("2") << std::endl; + //] + + //[code_modify_base_attr + pugi::xml_attribute attr = node.attribute("id"); + + // change attribute name/value + std::cout << attr.set_name("key") << ", " << attr.set_value("345"); + std::cout << ", new attribute: " << attr.name() << "=" << attr.value() << std::endl; + + // we can use numbers or booleans + attr.set_value(1.234); + std::cout << "new attribute value: " << attr.value() << std::endl; + + // we can also use assignment operators for more concise code + attr = true; + std::cout << "final attribute value: " << attr.value() << std::endl; + //] +} + +// vim:et diff --git a/docs/samples/modify_remove.cpp b/docs/samples/modify_remove.cpp index 015b0da..28c2f6b 100644 --- a/docs/samples/modify_remove.cpp +++ b/docs/samples/modify_remove.cpp @@ -1,27 +1,27 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - pugi::xml_document doc; - if (!doc.load("Simple node")) return -1; - - //[code_modify_remove - // remove description node with the whole subtree - pugi::xml_node node = doc.child("node"); - node.remove_child("description"); - - // remove id attribute - pugi::xml_node param = node.child("param"); - param.remove_attribute("value"); - - // we can also remove nodes/attributes by handles - pugi::xml_attribute id = param.attribute("name"); - param.remove_attribute(id); - //] - - doc.print(std::cout); -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + pugi::xml_document doc; + if (!doc.load("Simple node")) return -1; + + //[code_modify_remove + // remove description node with the whole subtree + pugi::xml_node node = doc.child("node"); + node.remove_child("description"); + + // remove id attribute + pugi::xml_node param = node.child("param"); + param.remove_attribute("value"); + + // we can also remove nodes/attributes by handles + pugi::xml_attribute id = param.attribute("name"); + param.remove_attribute(id); + //] + + doc.print(std::cout); +} + +// vim:et diff --git a/docs/samples/save_custom_writer.cpp b/docs/samples/save_custom_writer.cpp index c0999e1..defcb33 100644 --- a/docs/samples/save_custom_writer.cpp +++ b/docs/samples/save_custom_writer.cpp @@ -1,116 +1,116 @@ -#include "pugixml.hpp" - -#include - -#include - -//[code_save_custom_writer -struct xml_string_writer: pugi::xml_writer -{ - std::string result; - - virtual void write(const void* data, size_t size) - { - result += std::string(static_cast(data), size); - } -}; -//] - -struct xml_memory_writer: pugi::xml_writer -{ - char* buffer; - size_t capacity; - - size_t result; - - xml_memory_writer(): buffer(0), capacity(0), result(0) - { - } - - xml_memory_writer(char* buffer, size_t capacity): buffer(buffer), capacity(capacity), result(0) - { - } - - size_t written_size() const - { - return result < capacity ? result : capacity; - } - - virtual void write(const void* data, size_t size) - { - if (result < capacity) - { - size_t chunk = (capacity - result < size) ? capacity - result : size; - - memcpy(buffer + result, data, chunk); - } - - result += size; - } -}; - -std::string node_to_string(pugi::xml_node node) -{ - xml_string_writer writer; - node.print(writer); - - return writer.result; -} - -char* node_to_buffer(pugi::xml_node node, char* buffer, size_t size) -{ - if (size == 0) return buffer; - - // leave one character for null terminator - xml_memory_writer writer(buffer, size - 1); - node.print(writer); - - // null terminate - buffer[writer.written_size()] = 0; - - return buffer; -} - -char* node_to_buffer_heap(pugi::xml_node node) -{ - // first pass: get required memory size - xml_memory_writer counter; - node.print(counter); - - // allocate necessary size (+1 for null termination) - char* buffer = new char[counter.result + 1]; - - // second pass: actual printing - xml_memory_writer writer(buffer, counter.result); - node.print(writer); - - // null terminate - buffer[writer.written_size()] = 0; - - return buffer; -} - -int main() -{ - // get a test document - pugi::xml_document doc; - doc.load("hey"); - - // get contents as std::string (single pass) - printf("contents: [%s]\n", node_to_string(doc).c_str()); - - // get contents into fixed-size buffer (single pass) - char large_buf[128]; - printf("contents: [%s]\n", node_to_buffer(doc, large_buf, sizeof(large_buf))); - - // get contents into fixed-size buffer (single pass, shows truncating behavior) - char small_buf[22]; - printf("contents: [%s]\n", node_to_buffer(doc, small_buf, sizeof(small_buf))); - - // get contents into heap-allocated buffer (two passes) - char* heap_buf = node_to_buffer_heap(doc); - printf("contents: [%s]\n", heap_buf); - delete[] heap_buf; -} - -// vim:et +#include "pugixml.hpp" + +#include + +#include + +//[code_save_custom_writer +struct xml_string_writer: pugi::xml_writer +{ + std::string result; + + virtual void write(const void* data, size_t size) + { + result += std::string(static_cast(data), size); + } +}; +//] + +struct xml_memory_writer: pugi::xml_writer +{ + char* buffer; + size_t capacity; + + size_t result; + + xml_memory_writer(): buffer(0), capacity(0), result(0) + { + } + + xml_memory_writer(char* buffer, size_t capacity): buffer(buffer), capacity(capacity), result(0) + { + } + + size_t written_size() const + { + return result < capacity ? result : capacity; + } + + virtual void write(const void* data, size_t size) + { + if (result < capacity) + { + size_t chunk = (capacity - result < size) ? capacity - result : size; + + memcpy(buffer + result, data, chunk); + } + + result += size; + } +}; + +std::string node_to_string(pugi::xml_node node) +{ + xml_string_writer writer; + node.print(writer); + + return writer.result; +} + +char* node_to_buffer(pugi::xml_node node, char* buffer, size_t size) +{ + if (size == 0) return buffer; + + // leave one character for null terminator + xml_memory_writer writer(buffer, size - 1); + node.print(writer); + + // null terminate + buffer[writer.written_size()] = 0; + + return buffer; +} + +char* node_to_buffer_heap(pugi::xml_node node) +{ + // first pass: get required memory size + xml_memory_writer counter; + node.print(counter); + + // allocate necessary size (+1 for null termination) + char* buffer = new char[counter.result + 1]; + + // second pass: actual printing + xml_memory_writer writer(buffer, counter.result); + node.print(writer); + + // null terminate + buffer[writer.written_size()] = 0; + + return buffer; +} + +int main() +{ + // get a test document + pugi::xml_document doc; + doc.load("hey"); + + // get contents as std::string (single pass) + printf("contents: [%s]\n", node_to_string(doc).c_str()); + + // get contents into fixed-size buffer (single pass) + char large_buf[128]; + printf("contents: [%s]\n", node_to_buffer(doc, large_buf, sizeof(large_buf))); + + // get contents into fixed-size buffer (single pass, shows truncating behavior) + char small_buf[22]; + printf("contents: [%s]\n", node_to_buffer(doc, small_buf, sizeof(small_buf))); + + // get contents into heap-allocated buffer (two passes) + char* heap_buf = node_to_buffer_heap(doc); + printf("contents: [%s]\n", heap_buf); + delete[] heap_buf; +} + +// vim:et diff --git a/docs/samples/save_file.cpp b/docs/samples/save_file.cpp index c30e986..30c1aa1 100644 --- a/docs/samples/save_file.cpp +++ b/docs/samples/save_file.cpp @@ -1,17 +1,17 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - // get a test document - pugi::xml_document doc; - doc.load("hey"); - - //[code_save_file - // save document to file - std::cout << "Saving result: " << doc.save_file("save_file_output.xml") << std::endl; - //] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + // get a test document + pugi::xml_document doc; + doc.load("hey"); + + //[code_save_file + // save document to file + std::cout << "Saving result: " << doc.save_file("save_file_output.xml") << std::endl; + //] +} + +// vim:et diff --git a/docs/samples/save_options.cpp b/docs/samples/save_options.cpp index 45ac096..6a49f66 100644 --- a/docs/samples/save_options.cpp +++ b/docs/samples/save_options.cpp @@ -1,48 +1,48 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - //[code_save_options - // get a test document - pugi::xml_document doc; - doc.load("hey"); - - // default options; prints - // - // - // hey - // - doc.save(std::cout); - std::cout << std::endl; - - // default options with custom indentation string; prints - // - // - // --hey - // - doc.save(std::cout, "--"); - std::cout << std::endl; - - // default options without indentation; prints - // - // - // hey - // - doc.save(std::cout, "\t", pugi::format_default & ~pugi::format_indent); // can also pass "" instead of indentation string for the same effect - std::cout << std::endl; - - // raw output; prints - // hey - doc.save(std::cout, "\t", pugi::format_raw); - std::cout << std::endl << std::endl; - - // raw output without declaration; prints - // hey - doc.save(std::cout, "\t", pugi::format_raw | pugi::format_no_declaration); - std::cout << std::endl; - //] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + //[code_save_options + // get a test document + pugi::xml_document doc; + doc.load("hey"); + + // default options; prints + // + // + // hey + // + doc.save(std::cout); + std::cout << std::endl; + + // default options with custom indentation string; prints + // + // + // --hey + // + doc.save(std::cout, "--"); + std::cout << std::endl; + + // default options without indentation; prints + // + // + // hey + // + doc.save(std::cout, "\t", pugi::format_default & ~pugi::format_indent); // can also pass "" instead of indentation string for the same effect + std::cout << std::endl; + + // raw output; prints + // hey + doc.save(std::cout, "\t", pugi::format_raw); + std::cout << std::endl << std::endl; + + // raw output without declaration; prints + // hey + doc.save(std::cout, "\t", pugi::format_raw | pugi::format_no_declaration); + std::cout << std::endl; + //] +} + +// vim:et diff --git a/docs/samples/save_stream.cpp b/docs/samples/save_stream.cpp index 39e99eb..d01965d 100644 --- a/docs/samples/save_stream.cpp +++ b/docs/samples/save_stream.cpp @@ -1,18 +1,18 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - // get a test document - pugi::xml_document doc; - doc.load("hey"); - - //[code_save_stream - // save document to standard output - std::cout << "Document:\n"; - doc.save(std::cout); - //] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + // get a test document + pugi::xml_document doc; + doc.load("hey"); + + //[code_save_stream + // save document to standard output + std::cout << "Document:\n"; + doc.save(std::cout); + //] +} + +// vim:et diff --git a/docs/samples/save_subtree.cpp b/docs/samples/save_subtree.cpp index a1c9f2f..0091b3d 100644 --- a/docs/samples/save_subtree.cpp +++ b/docs/samples/save_subtree.cpp @@ -1,26 +1,26 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - //[code_save_subtree - // get a test document - pugi::xml_document doc; - doc.load("hey"); - - // print document to standard output (prints hey) - doc.save(std::cout, "", pugi::format_raw); - std::cout << std::endl; - - // print document to standard output as a regular node (prints hey) - doc.print(std::cout, "", pugi::format_raw); - std::cout << std::endl; - - // print a subtree to standard output (prints hey) - doc.child("foo").child("call").print(std::cout, "", pugi::format_raw); - std::cout << std::endl; - //] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + //[code_save_subtree + // get a test document + pugi::xml_document doc; + doc.load("hey"); + + // print document to standard output (prints hey) + doc.save(std::cout, "", pugi::format_raw); + std::cout << std::endl; + + // print document to standard output as a regular node (prints hey) + doc.print(std::cout, "", pugi::format_raw); + std::cout << std::endl; + + // print a subtree to standard output (prints hey) + doc.child("foo").child("call").print(std::cout, "", pugi::format_raw); + std::cout << std::endl; + //] +} + +// vim:et diff --git a/docs/samples/transitions.xml b/docs/samples/transitions.xml index 9c261fc..a195ef8 100644 --- a/docs/samples/transitions.xml +++ b/docs/samples/transitions.xml @@ -1,7 +1,7 @@ - - - - - - - + + + + + + + diff --git a/docs/samples/traverse_base.cpp b/docs/samples/traverse_base.cpp index b6f6d2e..d59c8b0 100644 --- a/docs/samples/traverse_base.cpp +++ b/docs/samples/traverse_base.cpp @@ -1,51 +1,51 @@ -#include "pugixml.hpp" - -#include -#include - -int main() -{ - pugi::xml_document doc; - if (!doc.load_file("xgconsole.xml")) return -1; - - pugi::xml_node tools = doc.child("Profile").child("Tools"); - - //[code_traverse_base_basic - for (pugi::xml_node tool = tools.first_child(); tool; tool = tool.next_sibling()) - { - std::cout << "Tool:"; - - for (pugi::xml_attribute attr = tool.first_attribute(); attr; attr = attr.next_attribute()) - { - std::cout << " " << attr.name() << "=" << attr.value(); - } - - std::cout << std::endl; - } - //] - - std::cout << std::endl; - - //[code_traverse_base_data - for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) - { - std::cout << "Tool " << tool.attribute("Filename").value(); - std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool(); - std::cout << ", Timeout " << tool.attribute("Timeout").as_int(); - std::cout << ", Description '" << tool.child_value("Description") << "'\n"; - } - //] - - std::cout << std::endl; - - //[code_traverse_base_contents - std::cout << "Tool for *.dae generation: " << tools.find_child_by_attribute("Tool", "OutputFileMasks", "*.dae").attribute("Filename").value() << "\n"; - - for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) - { - std::cout << "Tool " << tool.attribute("Filename").value() << "\n"; - } - //] -} - -// vim:et +#include "pugixml.hpp" + +#include +#include + +int main() +{ + pugi::xml_document doc; + if (!doc.load_file("xgconsole.xml")) return -1; + + pugi::xml_node tools = doc.child("Profile").child("Tools"); + + //[code_traverse_base_basic + for (pugi::xml_node tool = tools.first_child(); tool; tool = tool.next_sibling()) + { + std::cout << "Tool:"; + + for (pugi::xml_attribute attr = tool.first_attribute(); attr; attr = attr.next_attribute()) + { + std::cout << " " << attr.name() << "=" << attr.value(); + } + + std::cout << std::endl; + } + //] + + std::cout << std::endl; + + //[code_traverse_base_data + for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) + { + std::cout << "Tool " << tool.attribute("Filename").value(); + std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool(); + std::cout << ", Timeout " << tool.attribute("Timeout").as_int(); + std::cout << ", Description '" << tool.child_value("Description") << "'\n"; + } + //] + + std::cout << std::endl; + + //[code_traverse_base_contents + std::cout << "Tool for *.dae generation: " << tools.find_child_by_attribute("Tool", "OutputFileMasks", "*.dae").attribute("Filename").value() << "\n"; + + for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) + { + std::cout << "Tool " << tool.attribute("Filename").value() << "\n"; + } + //] +} + +// vim:et diff --git a/docs/samples/traverse_iter.cpp b/docs/samples/traverse_iter.cpp index 935540f..90e0dc6 100644 --- a/docs/samples/traverse_iter.cpp +++ b/docs/samples/traverse_iter.cpp @@ -1,27 +1,27 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - pugi::xml_document doc; - if (!doc.load_file("xgconsole.xml")) return -1; - - pugi::xml_node tools = doc.child("Profile").child("Tools"); - - //[code_traverse_iter - for (pugi::xml_node_iterator it = tools.begin(); it != tools.end(); ++it) - { - std::cout << "Tool:"; - - for (pugi::xml_attribute_iterator ait = it->attributes_begin(); ait != it->attributes_end(); ++ait) - { - std::cout << " " << ait->name() << "=" << ait->value(); - } - - std::cout << std::endl; - } - //] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + pugi::xml_document doc; + if (!doc.load_file("xgconsole.xml")) return -1; + + pugi::xml_node tools = doc.child("Profile").child("Tools"); + + //[code_traverse_iter + for (pugi::xml_node_iterator it = tools.begin(); it != tools.end(); ++it) + { + std::cout << "Tool:"; + + for (pugi::xml_attribute_iterator ait = it->attributes_begin(); ait != it->attributes_end(); ++ait) + { + std::cout << " " << ait->name() << "=" << ait->value(); + } + + std::cout << std::endl; + } + //] +} + +// vim:et diff --git a/docs/samples/traverse_predicate.cpp b/docs/samples/traverse_predicate.cpp index e565597..9d8ded0 100644 --- a/docs/samples/traverse_predicate.cpp +++ b/docs/samples/traverse_predicate.cpp @@ -1,48 +1,48 @@ -#include "pugixml.hpp" - -#include -#include - -//[code_traverse_predicate_decl -bool small_timeout(pugi::xml_node node) -{ - return node.attribute("Timeout").as_int() < 20; -} - -struct allow_remote_predicate -{ - bool operator()(pugi::xml_attribute attr) const - { - return strcmp(attr.name(), "AllowRemote") == 0; - } - - bool operator()(pugi::xml_node node) const - { - return node.attribute("AllowRemote").as_bool(); - } -}; -//] - -int main() -{ - pugi::xml_document doc; - if (!doc.load_file("xgconsole.xml")) return -1; - - pugi::xml_node tools = doc.child("Profile").child("Tools"); - - //[code_traverse_predicate_find - // Find child via predicate (looks for direct children only) - std::cout << tools.find_child(allow_remote_predicate()).attribute("Filename").value() << std::endl; - - // Find node via predicate (looks for all descendants in depth-first order) - std::cout << doc.find_node(allow_remote_predicate()).attribute("Filename").value() << std::endl; - - // Find attribute via predicate - std::cout << tools.last_child().find_attribute(allow_remote_predicate()).value() << std::endl; - - // We can use simple functions instead of function objects - std::cout << tools.find_child(small_timeout).attribute("Filename").value() << std::endl; - //] -} - -// vim:et +#include "pugixml.hpp" + +#include +#include + +//[code_traverse_predicate_decl +bool small_timeout(pugi::xml_node node) +{ + return node.attribute("Timeout").as_int() < 20; +} + +struct allow_remote_predicate +{ + bool operator()(pugi::xml_attribute attr) const + { + return strcmp(attr.name(), "AllowRemote") == 0; + } + + bool operator()(pugi::xml_node node) const + { + return node.attribute("AllowRemote").as_bool(); + } +}; +//] + +int main() +{ + pugi::xml_document doc; + if (!doc.load_file("xgconsole.xml")) return -1; + + pugi::xml_node tools = doc.child("Profile").child("Tools"); + + //[code_traverse_predicate_find + // Find child via predicate (looks for direct children only) + std::cout << tools.find_child(allow_remote_predicate()).attribute("Filename").value() << std::endl; + + // Find node via predicate (looks for all descendants in depth-first order) + std::cout << doc.find_node(allow_remote_predicate()).attribute("Filename").value() << std::endl; + + // Find attribute via predicate + std::cout << tools.last_child().find_attribute(allow_remote_predicate()).value() << std::endl; + + // We can use simple functions instead of function objects + std::cout << tools.find_child(small_timeout).attribute("Filename").value() << std::endl; + //] +} + +// vim:et diff --git a/docs/samples/traverse_walker.cpp b/docs/samples/traverse_walker.cpp index 9387fa0..cb99902 100644 --- a/docs/samples/traverse_walker.cpp +++ b/docs/samples/traverse_walker.cpp @@ -1,35 +1,35 @@ -#include "pugixml.hpp" - -#include - -const char* node_types[] = -{ - "null", "document", "element", "pcdata", "cdata", "comment", "pi", "declaration" -}; - -//[code_traverse_walker_impl -struct simple_walker: pugi::xml_tree_walker -{ - virtual bool for_each(pugi::xml_node& node) - { - for (int i = 0; i < depth(); ++i) std::cout << " "; // indentation - - std::cout << node_types[node.type()] << ": name='" << node.name() << "', value='" << node.value() << "'\n"; - - return true; // continue traversal - } -}; -//] - -int main() -{ - pugi::xml_document doc; - if (!doc.load_file("tree.xml")) return -1; - - //[code_traverse_walker_traverse - simple_walker walker; - doc.traverse(walker); - //] -} - -// vim:et +#include "pugixml.hpp" + +#include + +const char* node_types[] = +{ + "null", "document", "element", "pcdata", "cdata", "comment", "pi", "declaration" +}; + +//[code_traverse_walker_impl +struct simple_walker: pugi::xml_tree_walker +{ + virtual bool for_each(pugi::xml_node& node) + { + for (int i = 0; i < depth(); ++i) std::cout << " "; // indentation + + std::cout << node_types[node.type()] << ": name='" << node.name() << "', value='" << node.value() << "'\n"; + + return true; // continue traversal + } +}; +//] + +int main() +{ + pugi::xml_document doc; + if (!doc.load_file("tree.xml")) return -1; + + //[code_traverse_walker_traverse + simple_walker walker; + doc.traverse(walker); + //] +} + +// vim:et diff --git a/docs/samples/tree.xml b/docs/samples/tree.xml index 81b6f4f..b33267a 100644 --- a/docs/samples/tree.xml +++ b/docs/samples/tree.xml @@ -1,12 +1,12 @@ - - - - some text - - some more text - - - - - - + + + + some text + + some more text + + + + + + diff --git a/docs/samples/weekly-shift_jis.xml b/docs/samples/weekly-shift_jis.xml index 7421455..097e374 100644 --- a/docs/samples/weekly-shift_jis.xml +++ b/docs/samples/weekly-shift_jis.xml @@ -1,78 +1,78 @@ - - - - - <”NŒŽT> - <”N“x>1997 - <ŒŽ“x>1 - 1 - - - <Ž–¼> - <Ž>ŽR“c - <–¼>‘¾˜Y - - - <‹Æ–±•ñƒŠƒXƒg> - <‹Æ–±•ñ> - <‹Æ–±–¼>XMLƒGƒfƒBƒ^[‚Ìì¬ - <‹Æ–±ƒR[ƒh>X3355-23 - - <Œ©Ï‚à‚èH”>1600 - <ŽÀÑH”>320 - <“–ŒŽŒ©Ï‚à‚èH”>160 - <“–ŒŽŽÀÑH”>24 - - <—\’耖ڃŠƒXƒg> - <—\’耖Ú> -

XMLƒGƒfƒBƒ^[‚ÌŠî–{Žd—l‚Ìì¬

- - - <ŽÀŽ{Ž–€ƒŠƒXƒg> - <ŽÀŽ{Ž–€> -

XMLƒGƒfƒBƒ^[‚ÌŠî–{Žd—l‚Ìì¬

- - <ŽÀŽ{Ž–€> -

‹£‡‘¼ŽÐ»•i‚Ì‹@”\’²¸

- - - <ã’·‚Ö‚Ì—v¿Ž–€ƒŠƒXƒg> - <ã’·‚Ö‚Ì—v¿Ž–€> -

“Á‚É‚È‚µ

- - - <–â‘è“_‘Îô> -

XML‚Ƃ͉½‚©‚í‚©‚ç‚È‚¢B

- - - - <‹Æ–±•ñ> - <‹Æ–±–¼>ŒŸõƒGƒ“ƒWƒ“‚ÌŠJ”­ - <‹Æ–±ƒR[ƒh>S8821-76 - - <Œ©Ï‚à‚èH”>120 - <ŽÀÑH”>6 - <“–ŒŽŒ©Ï‚à‚èH”>32 - <“–ŒŽŽÀÑH”>2 - - <—\’耖ڃŠƒXƒg> - <—\’耖Ú> -

goo‚Ì‹@”\‚𒲂ׂĂ݂é

- - - <ŽÀŽ{Ž–€ƒŠƒXƒg> - <ŽÀŽ{Ž–€> -

X‚ÉA‚Ç‚¤‚¢‚¤ŒŸõƒGƒ“ƒWƒ“‚ª‚ ‚é‚©’²¸‚·‚é

- - - <ã’·‚Ö‚Ì—v¿Ž–€ƒŠƒXƒg> - <ã’·‚Ö‚Ì—v¿Ž–€> -

ŠJ”­‚ð‚·‚é‚Ì‚Í‚ß‚ñ‚Ç‚¤‚È‚Ì‚ÅAYahoo!‚𔃎û‚µ‚ĉº‚³‚¢B

- - - <–â‘è“_‘Îô> -

ŒŸõƒGƒ“ƒWƒ“‚ŎԂ𑖂点‚邱‚Æ‚ª‚Å‚«‚È‚¢Bi—v’²¸j

- - - - + + + + + <”NŒŽT> + <”N“x>1997 + <ŒŽ“x>1 + 1 + + + <Ž–¼> + <Ž>ŽR“c + <–¼>‘¾˜Y + + + <‹Æ–±•ñƒŠƒXƒg> + <‹Æ–±•ñ> + <‹Æ–±–¼>XMLƒGƒfƒBƒ^[‚Ìì¬ + <‹Æ–±ƒR[ƒh>X3355-23 + + <Œ©Ï‚à‚èH”>1600 + <ŽÀÑH”>320 + <“–ŒŽŒ©Ï‚à‚èH”>160 + <“–ŒŽŽÀÑH”>24 + + <—\’耖ڃŠƒXƒg> + <—\’耖Ú> +

XMLƒGƒfƒBƒ^[‚ÌŠî–{Žd—l‚Ìì¬

+ + + <ŽÀŽ{Ž–€ƒŠƒXƒg> + <ŽÀŽ{Ž–€> +

XMLƒGƒfƒBƒ^[‚ÌŠî–{Žd—l‚Ìì¬

+ + <ŽÀŽ{Ž–€> +

‹£‡‘¼ŽÐ»•i‚Ì‹@”\’²¸

+ + + <ã’·‚Ö‚Ì—v¿Ž–€ƒŠƒXƒg> + <ã’·‚Ö‚Ì—v¿Ž–€> +

“Á‚É‚È‚µ

+ + + <–â‘è“_‘Îô> +

XML‚Ƃ͉½‚©‚í‚©‚ç‚È‚¢B

+ + + + <‹Æ–±•ñ> + <‹Æ–±–¼>ŒŸõƒGƒ“ƒWƒ“‚ÌŠJ”­ + <‹Æ–±ƒR[ƒh>S8821-76 + + <Œ©Ï‚à‚èH”>120 + <ŽÀÑH”>6 + <“–ŒŽŒ©Ï‚à‚èH”>32 + <“–ŒŽŽÀÑH”>2 + + <—\’耖ڃŠƒXƒg> + <—\’耖Ú> +

goo‚Ì‹@”\‚𒲂ׂĂ݂é

+ + + <ŽÀŽ{Ž–€ƒŠƒXƒg> + <ŽÀŽ{Ž–€> +

X‚ÉA‚Ç‚¤‚¢‚¤ŒŸõƒGƒ“ƒWƒ“‚ª‚ ‚é‚©’²¸‚·‚é

+ + + <ã’·‚Ö‚Ì—v¿Ž–€ƒŠƒXƒg> + <ã’·‚Ö‚Ì—v¿Ž–€> +

ŠJ”­‚ð‚·‚é‚Ì‚Í‚ß‚ñ‚Ç‚¤‚È‚Ì‚ÅAYahoo!‚𔃎û‚µ‚ĉº‚³‚¢B

+ + + <–â‘è“_‘Îô> +

ŒŸõƒGƒ“ƒWƒ“‚ŎԂ𑖂点‚邱‚Æ‚ª‚Å‚«‚È‚¢Bi—v’²¸j

+ + + +
diff --git a/docs/samples/weekly-utf-8.xml b/docs/samples/weekly-utf-8.xml index 497f572..d55474e 100644 --- a/docs/samples/weekly-utf-8.xml +++ b/docs/samples/weekly-utf-8.xml @@ -1,78 +1,78 @@ - - - -<週報> - <年月週> - <年度>1997 - <月度>1 - <週>1 - - - <æ°å> - <æ°>山田 - <å>太郎 - - - <業務報告リスト> - <業務報告> - <業務å>XMLエディターã®ä½œæˆ - <業務コード>X3355-23 - <工数管ç†> - <見ç©ã‚‚り工数>1600 - <実績工数>320 - <当月見ç©ã‚‚り工数>160 - <当月実績工数>24 - - <予定項目リスト> - <予定項目> -

XMLエディターã®åŸºæœ¬ä»•æ§˜ã®ä½œæˆ

- - - <実施事項リスト> - <実施事項> -

XMLエディターã®åŸºæœ¬ä»•æ§˜ã®ä½œæˆ

- - <実施事項> -

競åˆä»–社製å“ã®æ©Ÿèƒ½èª¿æŸ»

- - - <上長ã¸ã®è¦è«‹äº‹é …リスト> - <上長ã¸ã®è¦è«‹äº‹é …> -

特ã«ãªã—

- - - <å•é¡Œç‚¹å¯¾ç­–> -

XMLã¨ã¯ä½•ã‹ã‚ã‹ã‚‰ãªã„。

- - - - <業務報告> - <業務å>検索エンジンã®é–‹ç™º - <業務コード>S8821-76 - <工数管ç†> - <見ç©ã‚‚り工数>120 - <実績工数>6 - <当月見ç©ã‚‚り工数>32 - <当月実績工数>2 - - <予定項目リスト> - <予定項目> -

gooã®æ©Ÿèƒ½ã‚’調ã¹ã¦ã¿ã‚‹

- - - <実施事項リスト> - <実施事項> -

æ›´ã«ã€ã©ã†ã„ã†æ¤œç´¢ã‚¨ãƒ³ã‚¸ãƒ³ãŒã‚ã‚‹ã‹èª¿æŸ»ã™ã‚‹

- - - <上長ã¸ã®è¦è«‹äº‹é …リスト> - <上長ã¸ã®è¦è«‹äº‹é …> -

開発をã™ã‚‹ã®ã¯ã‚ã‚“ã©ã†ãªã®ã§ã€Yahoo!ã‚’è²·åŽã—ã¦ä¸‹ã•ã„。

- - - <å•é¡Œç‚¹å¯¾ç­–> -

検索エンジンã§è»Šã‚’走らã›ã‚‹ã“ã¨ãŒã§ããªã„。(è¦èª¿æŸ»ï¼‰

- - - - + + + +<週報> + <年月週> + <年度>1997 + <月度>1 + <週>1 + + + <æ°å> + <æ°>山田 + <å>太郎 + + + <業務報告リスト> + <業務報告> + <業務å>XMLエディターã®ä½œæˆ + <業務コード>X3355-23 + <工数管ç†> + <見ç©ã‚‚り工数>1600 + <実績工数>320 + <当月見ç©ã‚‚り工数>160 + <当月実績工数>24 + + <予定項目リスト> + <予定項目> +

XMLエディターã®åŸºæœ¬ä»•æ§˜ã®ä½œæˆ

+ + + <実施事項リスト> + <実施事項> +

XMLエディターã®åŸºæœ¬ä»•æ§˜ã®ä½œæˆ

+ + <実施事項> +

競åˆä»–社製å“ã®æ©Ÿèƒ½èª¿æŸ»

+ + + <上長ã¸ã®è¦è«‹äº‹é …リスト> + <上長ã¸ã®è¦è«‹äº‹é …> +

特ã«ãªã—

+ + + <å•é¡Œç‚¹å¯¾ç­–> +

XMLã¨ã¯ä½•ã‹ã‚ã‹ã‚‰ãªã„。

+ + + + <業務報告> + <業務å>検索エンジンã®é–‹ç™º + <業務コード>S8821-76 + <工数管ç†> + <見ç©ã‚‚り工数>120 + <実績工数>6 + <当月見ç©ã‚‚り工数>32 + <当月実績工数>2 + + <予定項目リスト> + <予定項目> +

gooã®æ©Ÿèƒ½ã‚’調ã¹ã¦ã¿ã‚‹

+ + + <実施事項リスト> + <実施事項> +

æ›´ã«ã€ã©ã†ã„ã†æ¤œç´¢ã‚¨ãƒ³ã‚¸ãƒ³ãŒã‚ã‚‹ã‹èª¿æŸ»ã™ã‚‹

+ + + <上長ã¸ã®è¦è«‹äº‹é …リスト> + <上長ã¸ã®è¦è«‹äº‹é …> +

開発をã™ã‚‹ã®ã¯ã‚ã‚“ã©ã†ãªã®ã§ã€Yahoo!ã‚’è²·åŽã—ã¦ä¸‹ã•ã„。

+ + + <å•é¡Œç‚¹å¯¾ç­–> +

検索エンジンã§è»Šã‚’走らã›ã‚‹ã“ã¨ãŒã§ããªã„。(è¦èª¿æŸ»ï¼‰

+ + + + diff --git a/docs/samples/xgconsole.xml b/docs/samples/xgconsole.xml index cf603ad..b2bf5a2 100644 --- a/docs/samples/xgconsole.xml +++ b/docs/samples/xgconsole.xml @@ -1,12 +1,12 @@ - - - - - Jamplus build system - - - - - - - + + + + + Jamplus build system + + + + + + + diff --git a/docs/samples/xpath_error.cpp b/docs/samples/xpath_error.cpp index 3415d31..6cb6f4f 100644 --- a/docs/samples/xpath_error.cpp +++ b/docs/samples/xpath_error.cpp @@ -1,43 +1,43 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - pugi::xml_document doc; - if (!doc.load_file("xgconsole.xml")) return -1; - -//[code_xpath_error - // Exception is thrown for incorrect query syntax - try - { - doc.select_nodes("//nodes[#true()]"); - } - catch (const pugi::xpath_exception& e) - { - std::cout << "Select failed: " << e.what() << std::endl; - } - - // Exception is thrown for incorrect query semantics - try - { - doc.select_nodes("(123)/next"); - } - catch (const pugi::xpath_exception& e) - { - std::cout << "Select failed: " << e.what() << std::endl; - } - - // Exception is thrown for query with incorrect return type - try - { - doc.select_nodes("123"); - } - catch (const pugi::xpath_exception& e) - { - std::cout << "Select failed: " << e.what() << std::endl; - } -//] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + pugi::xml_document doc; + if (!doc.load_file("xgconsole.xml")) return -1; + +//[code_xpath_error + // Exception is thrown for incorrect query syntax + try + { + doc.select_nodes("//nodes[#true()]"); + } + catch (const pugi::xpath_exception& e) + { + std::cout << "Select failed: " << e.what() << std::endl; + } + + // Exception is thrown for incorrect query semantics + try + { + doc.select_nodes("(123)/next"); + } + catch (const pugi::xpath_exception& e) + { + std::cout << "Select failed: " << e.what() << std::endl; + } + + // Exception is thrown for query with incorrect return type + try + { + doc.select_nodes("123"); + } + catch (const pugi::xpath_exception& e) + { + std::cout << "Select failed: " << e.what() << std::endl; + } +//] +} + +// vim:et diff --git a/docs/samples/xpath_query.cpp b/docs/samples/xpath_query.cpp index b14a0b0..c622a9c 100644 --- a/docs/samples/xpath_query.cpp +++ b/docs/samples/xpath_query.cpp @@ -1,36 +1,36 @@ -#include "pugixml.hpp" - -#include -#include - -int main() -{ - pugi::xml_document doc; - if (!doc.load_file("xgconsole.xml")) return -1; - -//[code_xpath_query - // Select nodes via compiled query - pugi::xpath_query query_remote_tools("/Profile/Tools/Tool[@AllowRemote='true']"); - - pugi::xpath_node_set tools = query_remote_tools.evaluate_node_set(doc); - std::cout << "Remote tool: "; - tools[2].node().print(std::cout); - - // Evaluate numbers via compiled query - pugi::xpath_query query_timeouts("sum(//Tool/@Timeout)"); - std::cout << query_timeouts.evaluate_number(doc) << std::endl; - - // Evaluate strings via compiled query for different context nodes - pugi::xpath_query query_name_valid("string-length(substring-before(@Filename, '_')) > 0 and @OutputFileMasks"); - pugi::xpath_query query_name("concat(substring-before(@Filename, '_'), ' produces ', @OutputFileMasks)"); - - for (pugi::xml_node tool = doc.first_element_by_path("Profile/Tools/Tool"); tool; tool = tool.next_sibling()) - { - std::string s = query_name.evaluate_string(tool); - - if (query_name_valid.evaluate_boolean(tool)) std::cout << s << std::endl; - } -//] -} - -// vim:et +#include "pugixml.hpp" + +#include +#include + +int main() +{ + pugi::xml_document doc; + if (!doc.load_file("xgconsole.xml")) return -1; + +//[code_xpath_query + // Select nodes via compiled query + pugi::xpath_query query_remote_tools("/Profile/Tools/Tool[@AllowRemote='true']"); + + pugi::xpath_node_set tools = query_remote_tools.evaluate_node_set(doc); + std::cout << "Remote tool: "; + tools[2].node().print(std::cout); + + // Evaluate numbers via compiled query + pugi::xpath_query query_timeouts("sum(//Tool/@Timeout)"); + std::cout << query_timeouts.evaluate_number(doc) << std::endl; + + // Evaluate strings via compiled query for different context nodes + pugi::xpath_query query_name_valid("string-length(substring-before(@Filename, '_')) > 0 and @OutputFileMasks"); + pugi::xpath_query query_name("concat(substring-before(@Filename, '_'), ' produces ', @OutputFileMasks)"); + + for (pugi::xml_node tool = doc.first_element_by_path("Profile/Tools/Tool"); tool; tool = tool.next_sibling()) + { + std::string s = query_name.evaluate_string(tool); + + if (query_name_valid.evaluate_boolean(tool)) std::cout << s << std::endl; + } +//] +} + +// vim:et diff --git a/docs/samples/xpath_select.cpp b/docs/samples/xpath_select.cpp index 5645fd3..c098bd1 100644 --- a/docs/samples/xpath_select.cpp +++ b/docs/samples/xpath_select.cpp @@ -1,27 +1,27 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - pugi::xml_document doc; - if (!doc.load_file("xgconsole.xml")) return -1; - -//[code_xpath_select - pugi::xpath_node_set tools = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote='true' and @DeriveCaptionFrom='lastparam']"); - - std::cout << "Tools:"; - - for (pugi::xpath_node_set::const_iterator it = tools.begin(); it != tools.end(); ++it) - { - pugi::xpath_node node = *it; - std::cout << " " << node.node().attribute("Filename").value(); - } - - pugi::xpath_node build_tool = doc.select_single_node("//Tool[contains(Description, 'build system')]"); - - std::cout << "\nBuild tool: " << build_tool.node().attribute("Filename").value() << "\n"; -//] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + pugi::xml_document doc; + if (!doc.load_file("xgconsole.xml")) return -1; + +//[code_xpath_select + pugi::xpath_node_set tools = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote='true' and @DeriveCaptionFrom='lastparam']"); + + std::cout << "Tools:"; + + for (pugi::xpath_node_set::const_iterator it = tools.begin(); it != tools.end(); ++it) + { + pugi::xpath_node node = *it; + std::cout << " " << node.node().attribute("Filename").value(); + } + + pugi::xpath_node build_tool = doc.select_single_node("//Tool[contains(Description, 'build system')]"); + + std::cout << "\nBuild tool: " << build_tool.node().attribute("Filename").value() << "\n"; +//] +} + +// vim:et diff --git a/readme.txt b/readme.txt index 58026a0..7f68b47 100644 --- a/readme.txt +++ b/readme.txt @@ -1,52 +1,52 @@ -pugixml 0.9 - an XML processing library - -Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) -Report bugs and download new versions at http://code.google.com/p/pugixml/ - -This is the distribution of pugixml, which is a C++ XML processing library, -which consists of a DOM-like interface with rich traversal/modification -capabilities, an extremely fast XML parser which constructs the DOM tree from -an XML file/buffer, and an XPath 1.0 implementation for complex data-driven -tree queries. Full Unicode support is also available, with Unicode interface -variants and conversions between different Unicode encodings (which happen -automatically during parsing/saving). - -The distribution contains the following folders: - - contrib/ - various contributions to pugixml - - docs/ - documentation - docs/samples - pugixml usage examples - docs/quickstart.html - quick start guide - docs/manual.html - complete manual - - scripts/ - project files for IDE/build systems - - src/ - header and source files - - readme.txt - this file. - -This library is distributed under the MIT License: - -Copyright (c) 2006-2010 Arseny Kapoulkine - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. +pugixml 0.9 - an XML processing library + +Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) +Report bugs and download new versions at http://code.google.com/p/pugixml/ + +This is the distribution of pugixml, which is a C++ XML processing library, +which consists of a DOM-like interface with rich traversal/modification +capabilities, an extremely fast XML parser which constructs the DOM tree from +an XML file/buffer, and an XPath 1.0 implementation for complex data-driven +tree queries. Full Unicode support is also available, with Unicode interface +variants and conversions between different Unicode encodings (which happen +automatically during parsing/saving). + +The distribution contains the following folders: + + contrib/ - various contributions to pugixml + + docs/ - documentation + docs/samples - pugixml usage examples + docs/quickstart.html - quick start guide + docs/manual.html - complete manual + + scripts/ - project files for IDE/build systems + + src/ - header and source files + + readme.txt - this file. + +This library is distributed under the MIT License: + +Copyright (c) 2006-2010 Arseny Kapoulkine + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index a589a93..5c73e7c 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -1,5 +1,5 @@ -project(pugixml) - -set(SOURCES ../src/pugixml.hpp ../src/pugiconfig.hpp ../src/pugixml.cpp ../src/pugixpath.cpp) - -add_library(pugixml STATIC ${SOURCES}) +project(pugixml) + +set(SOURCES ../src/pugixml.hpp ../src/pugiconfig.hpp ../src/pugixml.cpp ../src/pugixpath.cpp) + +add_library(pugixml STATIC ${SOURCES}) diff --git a/scripts/premake4.lua b/scripts/premake4.lua index 977fb92..184541a 100644 --- a/scripts/premake4.lua +++ b/scripts/premake4.lua @@ -1,86 +1,86 @@ --- Reset RNG seed to get consistent results across runs (i.e. XCode) -math.randomseed(12345) - -local static = _ARGS[1] == 'static' -local action = premake.action.current() - -if string.startswith(_ACTION, "vs") then - -- We need debugging symbols for all configurations, but runtime library depends on official Symbols flag, so hack it - function premake.vs200x_vcproj_symbols(cfg) - return 3 - end - - if action then - -- Disable solution generation - function action.onsolution(sln) - sln.vstudio_configs = premake.vstudio_buildconfigs(sln) - end - - -- Rename output file - function action.onproject(prj) - premake.generate(prj, "%%_" .. _ACTION .. (static and "_static" or "") .. ".vcproj", premake.vs200x_vcproj) - end - end -elseif _ACTION == "codeblocks" then - action.onsolution = nil - - function action.onproject(prj) - premake.generate(prj, "%%_" .. _ACTION .. ".cbp", premake.codeblocks_cbp) - end -elseif _ACTION == "codelite" then - action.onsolution = nil - - function action.onproject(prj) - premake.generate(prj, "%%_" .. _ACTION .. ".project", premake.codelite_project) - end -end - -solution "pugixml" - objdir(_ACTION) - targetdir(_ACTION) - -if string.startswith(_ACTION, "vs") then - if _ACTION ~= "vs2002" and _ACTION ~= "vs2003" then - platforms { "x32", "x64" } - - configuration "x32" targetdir(_ACTION .. "/x32") - configuration "x64" targetdir(_ACTION .. "/x64") - end - - configurations { "Debug", "Release" } - - if static then - configuration "Debug" targetsuffix "_sd" - configuration "Release" targetsuffix "_s" - else - configuration "Debug" targetsuffix "_d" - end -else - if _ACTION == "xcode3" then - platforms "universal" - end - - configurations { "Debug", "Release" } - - configuration "Debug" targetsuffix "_d" -end - -project "pugixml" - kind "StaticLib" - language "C++" - files { "../src/pugixml.hpp", "../src/pugiconfig.hpp", "../src/pugixml.cpp", "../src/pugixpath.cpp" } - flags { "NoPCH", "NoMinimalRebuild" } - uuid "89A1E353-E2DC-495C-B403-742BE206ACED" - -configuration "Debug" - defines { "_DEBUG" } - flags { "Symbols" } - -configuration "Release" - defines { "NDEBUG" } - flags { "Optimize" } - -if static then - configuration "*" - flags { "StaticRuntime" } -end +-- Reset RNG seed to get consistent results across runs (i.e. XCode) +math.randomseed(12345) + +local static = _ARGS[1] == 'static' +local action = premake.action.current() + +if string.startswith(_ACTION, "vs") then + -- We need debugging symbols for all configurations, but runtime library depends on official Symbols flag, so hack it + function premake.vs200x_vcproj_symbols(cfg) + return 3 + end + + if action then + -- Disable solution generation + function action.onsolution(sln) + sln.vstudio_configs = premake.vstudio_buildconfigs(sln) + end + + -- Rename output file + function action.onproject(prj) + premake.generate(prj, "%%_" .. _ACTION .. (static and "_static" or "") .. ".vcproj", premake.vs200x_vcproj) + end + end +elseif _ACTION == "codeblocks" then + action.onsolution = nil + + function action.onproject(prj) + premake.generate(prj, "%%_" .. _ACTION .. ".cbp", premake.codeblocks_cbp) + end +elseif _ACTION == "codelite" then + action.onsolution = nil + + function action.onproject(prj) + premake.generate(prj, "%%_" .. _ACTION .. ".project", premake.codelite_project) + end +end + +solution "pugixml" + objdir(_ACTION) + targetdir(_ACTION) + +if string.startswith(_ACTION, "vs") then + if _ACTION ~= "vs2002" and _ACTION ~= "vs2003" then + platforms { "x32", "x64" } + + configuration "x32" targetdir(_ACTION .. "/x32") + configuration "x64" targetdir(_ACTION .. "/x64") + end + + configurations { "Debug", "Release" } + + if static then + configuration "Debug" targetsuffix "_sd" + configuration "Release" targetsuffix "_s" + else + configuration "Debug" targetsuffix "_d" + end +else + if _ACTION == "xcode3" then + platforms "universal" + end + + configurations { "Debug", "Release" } + + configuration "Debug" targetsuffix "_d" +end + +project "pugixml" + kind "StaticLib" + language "C++" + files { "../src/pugixml.hpp", "../src/pugiconfig.hpp", "../src/pugixml.cpp", "../src/pugixpath.cpp" } + flags { "NoPCH", "NoMinimalRebuild" } + uuid "89A1E353-E2DC-495C-B403-742BE206ACED" + +configuration "Debug" + defines { "_DEBUG" } + flags { "Symbols" } + +configuration "Release" + defines { "NDEBUG" } + flags { "Optimize" } + +if static then + configuration "*" + flags { "StaticRuntime" } +end diff --git a/scripts/pugixml_vs2005.vcproj b/scripts/pugixml_vs2005.vcproj index d5eecfd..7085ec1 100644 --- a/scripts/pugixml_vs2005.vcproj +++ b/scripts/pugixml_vs2005.vcproj @@ -1,347 +1,347 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/pugixml_vs2005_static.vcproj b/scripts/pugixml_vs2005_static.vcproj index 05c5604..4ed7e8c 100644 --- a/scripts/pugixml_vs2005_static.vcproj +++ b/scripts/pugixml_vs2005_static.vcproj @@ -1,347 +1,347 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/pugixml_vs2008.vcproj b/scripts/pugixml_vs2008.vcproj index 868b2a7..a53b64d 100644 --- a/scripts/pugixml_vs2008.vcproj +++ b/scripts/pugixml_vs2008.vcproj @@ -1,343 +1,343 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/pugixml_vs2008_static.vcproj b/scripts/pugixml_vs2008_static.vcproj index c888120..0949219 100644 --- a/scripts/pugixml_vs2008_static.vcproj +++ b/scripts/pugixml_vs2008_static.vcproj @@ -1,343 +1,343 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/pugixml_vs2010.vcxproj b/scripts/pugixml_vs2010.vcxproj index faa0066..5b9708c 100644 --- a/scripts/pugixml_vs2010.vcxproj +++ b/scripts/pugixml_vs2010.vcxproj @@ -1,162 +1,162 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - pugixml - {89A1E353-E2DC-495C-B403-742BE206ACED} - pugixml - Win32Proj - - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - - - - - - - - - - - - - - - - - <_ProjectFileVersion>10.0.30319.1 - vs2010\x32\ - vs2010\x32\Debug\ - vs2010\x64\ - vs2010\x64\Debug\ - vs2010\x32\ - vs2010\x32\Release\ - vs2010\x64\ - vs2010\x64\Release\ - $(ProjectName)_d - $(ProjectName)_d - - - - Disabled - _DEBUG;%(PreprocessorDefinitions) - EnableFastChecks - MultiThreadedDebugDLL - true - - - Level3 - $(OutDir)pugixml_d.pdb - ProgramDatabase - - - _DEBUG;%(PreprocessorDefinitions) - - - - - - X64 - - - Disabled - _DEBUG;%(PreprocessorDefinitions) - EnableFastChecks - MultiThreadedDebugDLL - true - - - Level3 - $(OutDir)pugixml_d.pdb - ProgramDatabase - - - _DEBUG;%(PreprocessorDefinitions) - - - - - - Full - NDEBUG;%(PreprocessorDefinitions) - true - MultiThreadedDLL - true - - - Level3 - $(OutDir)pugixml.pdb - ProgramDatabase - - - NDEBUG;%(PreprocessorDefinitions) - - - - - - X64 - - - Full - NDEBUG;%(PreprocessorDefinitions) - true - MultiThreadedDLL - true - - - Level3 - $(OutDir)pugixml.pdb - ProgramDatabase - - - NDEBUG;%(PreprocessorDefinitions) - - - - - - - - - - - - - - + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + pugixml + {89A1E353-E2DC-495C-B403-742BE206ACED} + pugixml + Win32Proj + + + + StaticLibrary + MultiByte + + + StaticLibrary + MultiByte + + + StaticLibrary + MultiByte + + + StaticLibrary + MultiByte + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + vs2010\x32\ + vs2010\x32\Debug\ + vs2010\x64\ + vs2010\x64\Debug\ + vs2010\x32\ + vs2010\x32\Release\ + vs2010\x64\ + vs2010\x64\Release\ + $(ProjectName)_d + $(ProjectName)_d + + + + Disabled + _DEBUG;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebugDLL + true + + + Level3 + $(OutDir)pugixml_d.pdb + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + + + + + + X64 + + + Disabled + _DEBUG;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebugDLL + true + + + Level3 + $(OutDir)pugixml_d.pdb + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + + + + + + Full + NDEBUG;%(PreprocessorDefinitions) + true + MultiThreadedDLL + true + + + Level3 + $(OutDir)pugixml.pdb + ProgramDatabase + + + NDEBUG;%(PreprocessorDefinitions) + + + + + + X64 + + + Full + NDEBUG;%(PreprocessorDefinitions) + true + MultiThreadedDLL + true + + + Level3 + $(OutDir)pugixml.pdb + ProgramDatabase + + + NDEBUG;%(PreprocessorDefinitions) + + + + + + + + + + + + + + \ No newline at end of file diff --git a/scripts/pugixml_vs2010_static.vcxproj b/scripts/pugixml_vs2010_static.vcxproj index 2659ef2..9192f24 100644 --- a/scripts/pugixml_vs2010_static.vcxproj +++ b/scripts/pugixml_vs2010_static.vcxproj @@ -1,164 +1,164 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - pugixml - {89A1E353-E2DC-495C-B403-742BE206ACED} - pugixml - Win32Proj - - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - - - - - - - - - - - - - - - - - <_ProjectFileVersion>10.0.30319.1 - vs2010\x32\ - vs2010\x32\Debug\ - vs2010\x64\ - vs2010\x64\Debug\ - vs2010\x32\ - vs2010\x32\Release\ - vs2010\x64\ - vs2010\x64\Release\ - $(ProjectName)_sd - $(ProjectName)_sd - $(ProjectName)_s - $(ProjectName)_s - - - - Disabled - _DEBUG;%(PreprocessorDefinitions) - EnableFastChecks - MultiThreadedDebug - true - - - Level3 - $(OutDir)pugixml_sd.pdb - ProgramDatabase - - - _DEBUG;%(PreprocessorDefinitions) - - - - - - X64 - - - Disabled - _DEBUG;%(PreprocessorDefinitions) - EnableFastChecks - MultiThreadedDebug - true - - - Level3 - $(OutDir)pugixml_sd.pdb - ProgramDatabase - - - _DEBUG;%(PreprocessorDefinitions) - - - - - - Full - NDEBUG;%(PreprocessorDefinitions) - true - MultiThreaded - true - - - Level3 - $(OutDir)pugixml_s.pdb - ProgramDatabase - - - NDEBUG;%(PreprocessorDefinitions) - - - - - - X64 - - - Full - NDEBUG;%(PreprocessorDefinitions) - true - MultiThreaded - true - - - Level3 - $(OutDir)pugixml_s.pdb - ProgramDatabase - - - NDEBUG;%(PreprocessorDefinitions) - - - - - - - - - - - - - - - + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + pugixml + {89A1E353-E2DC-495C-B403-742BE206ACED} + pugixml + Win32Proj + + + + StaticLibrary + MultiByte + + + StaticLibrary + MultiByte + + + StaticLibrary + MultiByte + + + StaticLibrary + MultiByte + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + vs2010\x32\ + vs2010\x32\Debug\ + vs2010\x64\ + vs2010\x64\Debug\ + vs2010\x32\ + vs2010\x32\Release\ + vs2010\x64\ + vs2010\x64\Release\ + $(ProjectName)_sd + $(ProjectName)_sd + $(ProjectName)_s + $(ProjectName)_s + + + + Disabled + _DEBUG;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebug + true + + + Level3 + $(OutDir)pugixml_sd.pdb + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + + + + + + X64 + + + Disabled + _DEBUG;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebug + true + + + Level3 + $(OutDir)pugixml_sd.pdb + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + + + + + + Full + NDEBUG;%(PreprocessorDefinitions) + true + MultiThreaded + true + + + Level3 + $(OutDir)pugixml_s.pdb + ProgramDatabase + + + NDEBUG;%(PreprocessorDefinitions) + + + + + + X64 + + + Full + NDEBUG;%(PreprocessorDefinitions) + true + MultiThreaded + true + + + Level3 + $(OutDir)pugixml_s.pdb + ProgramDatabase + + + NDEBUG;%(PreprocessorDefinitions) + + + + + + + + + + + + + + + diff --git a/src/pugixml.cpp b/src/pugixml.cpp index ca3f009..7f36ada 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1,4603 +1,4603 @@ -/** - * pugixml parser - version 0.9 - * -------------------------------------------------------- - * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) - * Report bugs and download new versions at http://code.google.com/p/pugixml/ - * - * This library is distributed under the MIT License. See notice at the end - * of this file. - * - * This work is based on the pugxml parser, which is: - * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) - */ - -#include "pugixml.hpp" - -#include -#include -#include -#include -#include -#include - -#ifndef PUGIXML_NO_STL -# include -# include -# include -#endif - -// For placement new -#include - -#ifdef _MSC_VER -# pragma warning(disable: 4127) // conditional expression is constant -# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable -# pragma warning(disable: 4324) // structure was padded due to __declspec(align()) -# pragma warning(disable: 4996) // this function or variable may be unsafe -#endif - -#ifdef __INTEL_COMPILER -# pragma warning(disable: 177) // function was declared but never referenced -# pragma warning(disable: 1478 1786) // function was declared "deprecated" -#endif - -#ifdef __BORLANDC__ -# pragma warn -8008 // condition is always false -# pragma warn -8066 // unreachable code -#endif - -#ifdef __SNC__ -# pragma diag_suppress=178 // function waS declared but never referenced -#endif - -// uintptr_t -#if !defined(_MSC_VER) || _MSC_VER >= 1600 -# include -#else -# if _MSC_VER < 1300 -// No native uintptr_t in MSVC6 -typedef size_t uintptr_t; -# endif -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -#endif - -// Inlining controls -#if defined(_MSC_VER) && _MSC_VER >= 1300 -# define PUGIXML_NO_INLINE __declspec(noinline) -#elif defined(__GNUC__) -# define PUGIXML_NO_INLINE __attribute__((noinline)) -#else -# define PUGIXML_NO_INLINE -#endif - -// Simple static assertion -#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } - -// Memory allocation -namespace -{ - void* default_allocate(size_t size) - { - return malloc(size); - } - - void default_deallocate(void* ptr) - { - free(ptr); - } - - pugi::allocation_function global_allocate = default_allocate; - pugi::deallocation_function global_deallocate = default_deallocate; -} - -// String utilities prototypes -namespace pugi -{ - namespace impl - { - size_t strlen(const char_t* s); - bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); - void widen_ascii(wchar_t* dest, const char* source); - } -} - -// String utilities -namespace pugi -{ - namespace impl - { - // Get string length - size_t strlen(const char_t* s) - { - #ifdef PUGIXML_WCHAR_MODE - return wcslen(s); - #else - return ::strlen(s); - #endif - } - - // Compare two strings - bool PUGIXML_FUNCTION strequal(const char_t* src, const char_t* dst) - { - #ifdef PUGIXML_WCHAR_MODE - return wcscmp(src, dst) == 0; - #else - return strcmp(src, dst) == 0; - #endif - } - - // Compare lhs with [rhs_begin, rhs_end) - bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) - { - for (size_t i = 0; i < count; ++i) - if (lhs[i] != rhs[i]) - return false; - - return lhs[count] == 0; - } - - // Character set pattern match. - static bool strequalwild_cset(const char_t** src, const char_t** dst) - { - int find = 0, excl = 0, star = 0; - - if (**src == '!') - { - excl = 1; - ++(*src); - } - - while (**src != ']' || star == 1) - { - if (find == 0) - { - if (**src == '-' && *(*src-1) < *(*src+1) && *(*src+1) != ']' && star == 0) - { - if (**dst >= *(*src-1) && **dst <= *(*src+1)) - { - find = 1; - ++(*src); - } - } - else if (**src == **dst) find = 1; - } - ++(*src); - star = 0; - } - - if (excl == 1) find = (1 - find); - if (find == 1) ++(*dst); - - return find == 0; - } - - // Wildcard pattern match. - static bool strequalwild_astr(const char_t** src, const char_t** dst) - { - int find = 1; - ++(*src); - while ((**dst != 0 && **src == '?') || **src == '*') - { - if(**src == '?') ++(*dst); - ++(*src); - } - while (**src == '*') ++(*src); - if (**dst == 0 && **src != 0) return 0; - if (**dst == 0 && **src == 0) return 1; - else - { - if (!impl::strequalwild(*src,*dst)) - { - do - { - ++(*dst); - while(**src != **dst && **src != '[' && **dst != 0) - ++(*dst); - } - while ((**dst != 0) ? !impl::strequalwild(*src,*dst) : 0 != (find=0)); - } - if (**dst == 0 && **src == 0) find = 1; - return find == 0; - } - } - - // Compare two strings, with globbing, and character sets. - bool PUGIXML_FUNCTION strequalwild(const char_t* src, const char_t* dst) - { - int find = 1; - for(; *src != 0 && find == 1 && *dst != 0; ++src) - { - switch (*src) - { - case '?': ++dst; break; - case '[': ++src; find = !strequalwild_cset(&src,&dst); break; - case '*': find = !strequalwild_astr(&src,&dst); --src; break; - default : find = (int) (*src == *dst); ++dst; - } - } - while (*src == '*' && find == 1) ++src; - return (find == 1 && *dst == 0 && *src == 0); - } - -#ifdef PUGIXML_WCHAR_MODE - // Convert string to wide string, assuming all symbols are ASCII - void widen_ascii(wchar_t* dest, const char* source) - { - for (const char* i = source; *i; ++i) *dest++ = *i; - *dest = 0; - } -#endif - } -} - -namespace pugi -{ - static const size_t xml_memory_page_size = 32768; - - static const uintptr_t xml_memory_page_alignment = 32; - static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); - static const uintptr_t xml_memory_page_name_allocated_mask = 16; - static const uintptr_t xml_memory_page_value_allocated_mask = 8; - static const uintptr_t xml_memory_page_type_mask = 7; - - struct xml_allocator; - - struct xml_memory_page - { - static xml_memory_page* construct(void* memory) - { - if (!memory) return 0; - - xml_memory_page* result = static_cast(memory); - - result->allocator = 0; - result->memory = 0; - result->prev = 0; - result->next = 0; - result->busy_size = 0; - result->freed_size = 0; - - return result; - } - - xml_allocator* allocator; - - void* memory; - - xml_memory_page* prev; - xml_memory_page* next; - - size_t busy_size; - size_t freed_size; - - char data[1]; - }; - - struct xml_memory_string_header - { - xml_memory_page* page; - size_t full_size; - }; - - struct xml_allocator - { - xml_allocator(xml_memory_page* root): _root(root), _busy_size(root ? root->busy_size : 0) - { - } - - xml_memory_page* allocate_page(size_t data_size) - { - size_t size = offsetof(xml_memory_page, data) + data_size; - - // allocate block with some alignment, leaving memory for worst-case padding - void* memory = global_allocate(size + xml_memory_page_alignment); - if (!memory) return 0; - - // align upwards to page boundary - void* page_memory = reinterpret_cast((reinterpret_cast(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); - - // prepare page structure - xml_memory_page* page = xml_memory_page::construct(page_memory); - - page->memory = memory; - page->allocator = _root->allocator; - - return page; - } - - static void deallocate_page(xml_memory_page* page) - { - global_deallocate(page->memory); - } - - void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); - - void* allocate_memory(size_t size, xml_memory_page*& out_page) - { - if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page); - - void* buf = _root->data + _busy_size; - - _busy_size += size; - - out_page = _root; - - return buf; - } - - void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) - { - assert(ptr >= page->data && ptr < page->data + xml_memory_page_size); - (void)!ptr; - - if (page == _root) page->busy_size = _busy_size; - - page->freed_size += size; - assert(page->freed_size <= page->busy_size); - - if (page->freed_size == page->busy_size) - { - if (page->next == 0) - { - assert(_root == page); - - // top page freed, just reset sizes - page->busy_size = page->freed_size = 0; - _busy_size = 0; - } - else - { - assert(_root != page); - assert(page->prev); - - // remove from the list - page->prev->next = page->next; - page->next->prev = page->prev; - - // deallocate - deallocate_page(page); - } - } - } - - char_t* allocate_string(size_t length) - { - // get actual size, rounded up to pointer alignment boundary - size_t size = ((length * sizeof(char_t)) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1); - - // allocate memory for string and header block - size_t full_size = sizeof(xml_memory_string_header) + size; - - xml_memory_page* page; - xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); - - if (!header) return 0; - - // setup header - header->page = page; - header->full_size = full_size; - - return reinterpret_cast(header + 1); - } - - void deallocate_string(char_t* string) - { - // get header - xml_memory_string_header* header = reinterpret_cast(string) - 1; - - // deallocate - deallocate_memory(header, header->full_size, header->page); - } - - xml_memory_page* _root; - size_t _busy_size; - }; - - PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) - { - const size_t large_allocation_threshold = xml_memory_page_size / 4; - - xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); - if (!page) return 0; - - if (size <= large_allocation_threshold) - { - _root->busy_size = _busy_size; - - // insert page at the end of linked list - page->prev = _root; - _root->next = page; - _root = page; - - _busy_size = size; - } - else - { - // insert page before the end of linked list - assert(_root->prev); - - page->prev = _root->prev; - page->next = _root; - - _root->prev->next = page; - _root->prev = page; - } - - // allocate inside page - page->busy_size = size; - - out_page = page; - return page->data; - } - - /// A 'name=value' XML attribute structure. - struct xml_attribute_struct - { - /// Default ctor - xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) - { - } - - uintptr_t header; - - char_t* name; ///< Pointer to attribute name. - char_t* value; ///< Pointer to attribute value. - - xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) - xml_attribute_struct* next_attribute; ///< Next attribute - }; - - /// An XML document tree node. - struct xml_node_struct - { - /// Default ctor - /// \param type - node type - xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | type), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) - { - } - - uintptr_t header; - - xml_node_struct* parent; ///< Pointer to parent - - char_t* name; ///< Pointer to element name. - char_t* value; ///< Pointer to any associated string data. - - xml_node_struct* first_child; ///< First child - - xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list) - xml_node_struct* next_sibling; ///< Right brother - - xml_attribute_struct* first_attribute; ///< First attribute - }; - - struct xml_document_struct: public xml_node_struct - { - xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), allocator(0), buffer(0) - { - } - - xml_allocator allocator; - const char_t* buffer; - }; - - inline xml_allocator& get_allocator(const xml_node_struct* node) - { - assert(node); - - return *reinterpret_cast(node->header & xml_memory_page_pointer_mask)->allocator; - } -} - -// Low-level DOM operations -namespace -{ - using namespace pugi; - - inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) - { - xml_memory_page* page; - void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page); - - return new (memory) xml_attribute_struct(page); - } - - inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) - { - xml_memory_page* page; - void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page); - - return new (memory) xml_node_struct(page, type); - } - - inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) - { - uintptr_t header = a->header; - - if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name); - if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value); - - alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); - } - - inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) - { - uintptr_t header = n->header; - - if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); - if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); - - for (xml_attribute_struct* attr = n->first_attribute; attr; ) - { - xml_attribute_struct* next = attr->next_attribute; - - destroy_attribute(attr, alloc); - - attr = next; - } - - for (xml_node_struct* child = n->first_child; child; ) - { - xml_node_struct* next = child->next_sibling; - - destroy_node(child, alloc); - - child = next; - } - - alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); - } - - PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) - { - xml_node_struct* child = allocate_node(alloc, type); - if (!child) return 0; - - child->parent = node; - - xml_node_struct* first_child = node->first_child; - - if (first_child) - { - xml_node_struct* last_child = first_child->prev_sibling_c; - - last_child->next_sibling = child; - child->prev_sibling_c = last_child; - first_child->prev_sibling_c = child; - } - else - { - node->first_child = child; - child->prev_sibling_c = child; - } - - return child; - } - - PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc) - { - xml_attribute_struct* a = allocate_attribute(alloc); - if (!a) return 0; - - xml_attribute_struct* first_attribute = node->first_attribute; - - if (first_attribute) - { - xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c; - - last_attribute->next_attribute = a; - a->prev_attribute_c = last_attribute; - first_attribute->prev_attribute_c = a; - } - else - { - node->first_attribute = a; - a->prev_attribute_c = a; - } - - return a; - } -} - -// Helper classes for code generation -namespace -{ - struct opt_false - { - enum { value = 0 }; - }; - - struct opt_true - { - enum { value = 1 }; - }; -} - -// Unicode utilities -namespace -{ - inline uint16_t endian_swap(uint16_t value) - { - return static_cast(((value & 0xff) << 8) | (value >> 8)); - } - - inline uint32_t endian_swap(uint32_t value) - { - return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); - } - - struct utf8_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) return result + 1; - // U+0080..U+07FF - else if (ch < 0x800) return result + 2; - // U+0800..U+FFFF - else return result + 3; - } - - static value_type high(value_type result, uint32_t) - { - // U+10000..U+10FFFF - return result + 4; - } - }; - - struct utf8_writer - { - typedef uint8_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) - { - *result = static_cast(ch); - return result + 1; - } - // U+0080..U+07FF - else if (ch < 0x800) - { - result[0] = static_cast(0xC0 | (ch >> 6)); - result[1] = static_cast(0x80 | (ch & 0x3F)); - return result + 2; - } - // U+0800..U+FFFF - else - { - result[0] = static_cast(0xE0 | (ch >> 12)); - result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[2] = static_cast(0x80 | (ch & 0x3F)); - return result + 3; - } - } - - static value_type high(value_type result, uint32_t ch) - { - // U+10000..U+10FFFF - result[0] = static_cast(0xF0 | (ch >> 18)); - result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); - result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[3] = static_cast(0x80 | (ch & 0x3F)); - return result + 4; - } - - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; - - struct utf16_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t) - { - return result + 1; - } - - static value_type high(value_type result, uint32_t) - { - return result + 2; - } - }; - - struct utf16_writer - { - typedef uint16_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - *result = static_cast(ch); - - return result + 1; - } - - static value_type high(value_type result, uint32_t ch) - { - uint32_t msh = (uint32_t)(ch - 0x10000) >> 10; - uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff; - - result[0] = static_cast(0xD800 + msh); - result[1] = static_cast(0xDC00 + lsh); - - return result + 2; - } - - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; - - struct utf32_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t) - { - return result + 1; - } - - static value_type high(value_type result, uint32_t) - { - return result + 1; - } - }; - - struct utf32_writer - { - typedef uint32_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - - static value_type high(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - - static value_type any(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - }; - - template struct wchar_selector; - - template <> struct wchar_selector<2> - { - typedef uint16_t type; - typedef utf16_counter counter; - typedef utf16_writer writer; - }; - - template <> struct wchar_selector<4> - { - typedef uint32_t type; - typedef utf32_counter counter; - typedef utf32_writer writer; - }; - - typedef wchar_selector::counter wchar_counter; - typedef wchar_selector::writer wchar_writer; - - template struct utf_decoder - { - static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) - { - const uint8_t utf8_byte_mask = 0x3f; - - while (size) - { - uint8_t lead = *data; - - // 0xxxxxxx -> U+0000..U+007F - if (lead < 0x80) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - - // process aligned single-byte (ascii) blocks - if ((reinterpret_cast(data) & 3) == 0) - { - while (size >= 4 && (*reinterpret_cast(data) & 0x80808080) == 0) - { - result = Traits::low(result, data[0]); - result = Traits::low(result, data[1]); - result = Traits::low(result, data[2]); - result = Traits::low(result, data[3]); - data += 4; - size -= 4; - } - } - } - // 110xxxxx -> U+0080..U+07FF - else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); - data += 2; - size -= 2; - } - // 1110xxxx -> U+0800-U+FFFF - else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); - data += 3; - size -= 3; - } - // 11110xxx -> U+10000..U+10FFFF - else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) - { - result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); - data += 4; - size -= 4; - } - // 10xxxxxx or 11111xxx -> invalid - else - { - data += 1; - size -= 1; - } - } - - return result; - } - - static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) - { - const uint16_t* end = data + size; - - while (data < end) - { - uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; - - // U+0000..U+D7FF - if (lead < 0xD800) - { - result = Traits::low(result, lead); - data += 1; - } - // U+E000..U+FFFF - else if ((unsigned)(lead - 0xE000) < 0x2000) - { - result = Traits::low(result, lead); - data += 1; - } - // surrogate pair lead - else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end) - { - uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; - - if ((unsigned)(next - 0xDC00) < 0x400) - { - result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); - data += 2; - } - else - { - data += 1; - } - } - else - { - data += 1; - } - } - - return result; - } - - static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) - { - const uint32_t* end = data + size; - - while (data < end) - { - uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; - - // U+0000..U+FFFF - if (lead < 0x10000) - { - result = Traits::low(result, lead); - data += 1; - } - // U+10000..U+10FFFF - else - { - result = Traits::high(result, lead); - data += 1; - } - } - - return result; - } - }; - - template inline void convert_utf_endian_swap(T* result, const T* data, size_t length) - { - for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]); - } - - inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) - { - for (size_t i = 0; i < length; ++i) result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); - } -} - -namespace -{ - using namespace pugi; - - enum chartype_t - { - ct_parse_pcdata = 1, // \0, &, \r, < - ct_parse_attr = 2, // \0, &, \r, ', " - ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab - ct_space = 8, // \r, \n, space, tab - ct_parse_cdata = 16, // \0, ], >, \r - ct_parse_comment = 32, // \0, -, >, \r - ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . - ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : - }; - - const unsigned char chartype_table[256] = - { - 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 - 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 - - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 - }; - -#ifdef PUGIXML_WCHAR_MODE - #define IS_CHARTYPE(c, ct) ((static_cast(c) < 128 ? chartype_table[static_cast(c)] : chartype_table[128]) & (ct)) -#else - #define IS_CHARTYPE(c, ct) (chartype_table[static_cast(c)] & (ct)) -#endif - - enum output_chartype_t - { - oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > - oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, " - }; - - const unsigned char output_chartype_table[256] = - { - 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 - 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63 - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - -#ifdef PUGIXML_WCHAR_MODE - #define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast(c) < 128 ? output_chartype_table[static_cast(c)] : output_chartype_table[128]) & (ct)) -#else - #define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast(c)] & (ct)) -#endif - - bool is_little_endian() - { - unsigned int ui = 1; - - return *reinterpret_cast(&ui) == 1; - } - - xml_encoding get_wchar_encoding() - { - STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); - - if (sizeof(wchar_t) == 2) - return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - else - return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - } - - xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) - { - // replace wchar encoding with utf implementation - if (encoding == encoding_wchar) return get_wchar_encoding(); - - // replace utf16 encoding with utf16 with specific endianness - if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - // replace utf32 encoding with utf32 with specific endianness - if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - // only do autodetection if no explicit encoding is requested - if (encoding != encoding_auto) return encoding; - - // try to guess encoding (based on XML specification, Appendix F.1) - const uint8_t* data = static_cast(contents); - - // look for BOM in first few bytes - if (size > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0xfe && data[3] == 0xff) return encoding_utf32_be; - if (size > 4 && data[0] == 0xff && data[1] == 0xfe && data[2] == 0 && data[3] == 0) return encoding_utf32_le; - if (size > 2 && data[0] == 0xfe && data[1] == 0xff) return encoding_utf16_be; - if (size > 2 && data[0] == 0xff && data[1] == 0xfe) return encoding_utf16_le; - if (size > 3 && data[0] == 0xef && data[1] == 0xbb && data[2] == 0xbf) return encoding_utf8; - - // look for <, 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && data[3] == 0x3c) return encoding_utf32_be; - if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0 && data[3] == 0) return encoding_utf32_le; - if (size > 4 && data[0] == 0 && data[1] == 0x3c && data[2] == 0 && data[3] == 0x3f) return encoding_utf16_be; - if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0x3f && data[3] == 0) return encoding_utf16_le; - if (size > 4 && data[0] == 0x3c && data[1] == 0x3f && data[2] == 0x78 && data[3] == 0x6d) return encoding_utf8; - - // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) - if (size > 2 && data[0] == 0 && data[1] == 0x3c) return encoding_utf16_be; - if (size > 2 && data[0] == 0x3c && data[1] == 0) return encoding_utf16_le; - - // no known BOM detected, assume utf8 - return encoding_utf8; - } - - bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - if (is_mutable) - { - out_buffer = static_cast(const_cast(contents)); - } - else - { - void* buffer = global_allocate(size > 0 ? size : 1); - if (!buffer) return false; - - memcpy(buffer, contents, size); - - out_buffer = static_cast(buffer); - } - - out_length = size / sizeof(char_t); - - return true; - } - -#ifdef PUGIXML_WCHAR_MODE - inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re) - { - return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || - (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); - } - - bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - const char_t* data = static_cast(contents); - - if (is_mutable) - { - out_buffer = const_cast(data); - } - else - { - out_buffer = static_cast(global_allocate(size > 0 ? size : 1)); - if (!out_buffer) return false; - } - - out_length = size / sizeof(char_t); - - convert_wchar_endian_swap(out_buffer, data, out_length); - - return true; - } - - bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) - { - const uint8_t* data = static_cast(contents); - - // first pass: get length in wchar_t units - out_length = utf_decoder::decode_utf8_block(data, size, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf8 input to wchar_t - wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); - wchar_writer::value_type out_end = utf_decoder::decode_utf8_block(data, size, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint16_t* data = static_cast(contents); - size_t length = size / sizeof(uint16_t); - - // first pass: get length in wchar_t units - out_length = utf_decoder::decode_utf16_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf16 input to wchar_t - wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); - wchar_writer::value_type out_end = utf_decoder::decode_utf16_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint32_t* data = static_cast(contents); - size_t length = size / sizeof(uint32_t); - - // first pass: get length in wchar_t units - out_length = utf_decoder::decode_utf32_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf32 input to wchar_t - wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); - wchar_writer::value_type out_end = utf_decoder::decode_utf32_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // get native encoding - xml_encoding wchar_encoding = get_wchar_encoding(); - - // fast path: no conversion required - if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // only endian-swapping is required - if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf8 - if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return false; - } -#else - template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint16_t* data = static_cast(contents); - size_t length = size / sizeof(uint16_t); - - // first pass: get length in utf8 units - out_length = utf_decoder::decode_utf16_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf16 input to utf8 - uint8_t* out_begin = reinterpret_cast(out_buffer); - uint8_t* out_end = utf_decoder::decode_utf16_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint32_t* data = static_cast(contents); - size_t length = size / sizeof(uint32_t); - - // first pass: get length in utf8 units - out_length = utf_decoder::decode_utf32_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf32 input to utf8 - uint8_t* out_begin = reinterpret_cast(out_buffer); - uint8_t* out_end = utf_decoder::decode_utf32_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // fast path: no conversion required - if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return false; - } -#endif - - bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) - { - size_t source_length = impl::strlen(source); - - if (dest && impl::strlen(dest) >= source_length) - { - memcpy(dest, source, (source_length + 1) * sizeof(char_t)); - - return true; - } - else - { - xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator; - - char_t* buf = alloc->allocate_string(source_length + 1); - if (!buf) return false; - - memcpy(buf, source, (source_length + 1) * sizeof(char_t)); - - if (header & header_mask) alloc->deallocate_string(dest); - - dest = buf; - header |= header_mask; - - return true; - } - } - - struct gap - { - char_t* end; - size_t size; - - gap(): end(0), size(0) - { - } - - // Push new gap, move s count bytes further (skipping the gap). - // Collapse previous gap. - void push(char_t*& s, size_t count) - { - if (end) // there was a gap already; collapse it - { - // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - } - - s += count; // end of current gap - - // "merge" two gaps - end = s; - size += count; - } - - // Collapse all gaps, return past-the-end pointer - char_t* flush(char_t* s) - { - if (end) - { - // Move [old_gap_end, current_pos) to [old_gap_start, ...) - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - - return s - size; - } - else return s; - } - }; - - char_t* strconv_escape(char_t* s, gap& g) - { - char_t* stre = s + 1; - - switch (*stre) - { - case '#': // &#... - { - unsigned int ucsc = 0; - - if (stre[1] == 'x') // &#x... (hex code) - { - stre += 2; - - char_t ch = *stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(ch - '0') <= 9) - ucsc = 16 * ucsc + (ch - '0'); - else if (static_cast((ch | ' ') - 'a') <= 5) - ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - else // &#... (dec code) - { - char_t ch = *++stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(ch - '0') <= 9) - ucsc = 10 * ucsc + (ch - '0'); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - - #ifdef PUGIXML_WCHAR_MODE - s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc)); - #else - s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); - #endif - - g.push(s, stre - s); - return stre; - } - case 'a': // &a - { - ++stre; - - if (*stre == 'm') // &am - { - if (*++stre == 'p' && *++stre == ';') // & - { - *s++ = '&'; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - else if (*stre == 'p') // &ap - { - if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' - { - *s++ = '\''; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - break; - } - case 'g': // &g - { - if (*++stre == 't' && *++stre == ';') // > - { - *s++ = '>'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - case 'l': // &l - { - if (*++stre == 't' && *++stre == ';') // < - { - *s++ = '<'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - case 'q': // &q - { - if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " - { - *s++ = '"'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - } - - return stre; - } - - // Utility macro for last character handling - #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) - - char_t* strconv_comment(char_t* s, char_t endch) - { - if (!*s) return 0; - - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s; - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here - { - *g.flush(s) = 0; - - return s + (s[2] == '>' ? 3 : 2); - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - char_t* strconv_cdata(char_t* s, char_t endch) - { - if (!*s) return 0; - - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s; - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - typedef char_t* (*strconv_pcdata_t)(char_t*); - - template struct strconv_pcdata_impl - { - static char_t* parse(char_t* s) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s; - - if (*s == '<') // PCDATA ends here - { - *g.flush(s) = 0; - - return s + 1; - } - else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (*s == 0) - { - return s; - } - else ++s; - } - } - }; - - strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) - { - STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20); - - switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes) - { - case 0: return strconv_pcdata_impl::parse; - case 1: return strconv_pcdata_impl::parse; - case 2: return strconv_pcdata_impl::parse; - case 3: return strconv_pcdata_impl::parse; - default: return 0; // should not get here - } - } - - typedef char_t* (*strconv_attribute_t)(char_t*, char_t); - - template struct strconv_attribute_impl - { - static char_t* parse_wnorm(char_t* s, char_t end_quote) - { - gap g; - - // trim leading whitespaces - if (IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s; - - do ++str; - while (IS_CHARTYPE(*str, ct_space)); - - g.push(s, str - s); - } - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s; - - if (*s == end_quote) - { - char_t* str = g.flush(s); - - do *str-- = 0; - while (IS_CHARTYPE(*str, ct_space)); - - return s + 1; - } - else if (IS_CHARTYPE(*s, ct_space)) - { - *s++ = ' '; - - if (IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s + 1; - while (IS_CHARTYPE(*str, ct_space)) ++str; - - g.push(s, str - s); - } - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_wconv(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s; - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (IS_CHARTYPE(*s, ct_space)) - { - if (*s == '\r') - { - *s++ = ' '; - - if (*s == '\n') g.push(s, 1); - } - else *s++ = ' '; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_eol(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == '\r') - { - *s++ = '\n'; - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_simple(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - }; - - strconv_attribute_t get_strconv_attribute(unsigned int optmask) - { - STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); - - switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) - { - case 0: return strconv_attribute_impl::parse_simple; - case 1: return strconv_attribute_impl::parse_simple; - case 2: return strconv_attribute_impl::parse_eol; - case 3: return strconv_attribute_impl::parse_eol; - case 4: return strconv_attribute_impl::parse_wconv; - case 5: return strconv_attribute_impl::parse_wconv; - case 6: return strconv_attribute_impl::parse_wconv; - case 7: return strconv_attribute_impl::parse_wconv; - case 8: return strconv_attribute_impl::parse_wnorm; - case 9: return strconv_attribute_impl::parse_wnorm; - case 10: return strconv_attribute_impl::parse_wnorm; - case 11: return strconv_attribute_impl::parse_wnorm; - case 12: return strconv_attribute_impl::parse_wnorm; - case 13: return strconv_attribute_impl::parse_wnorm; - case 14: return strconv_attribute_impl::parse_wnorm; - case 15: return strconv_attribute_impl::parse_wnorm; - default: return 0; // should not get here - } - } - - inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) - { - xml_parse_result result = {status, offset, encoding_auto}; - return result; - } - - struct xml_parser - { - xml_allocator alloc; - char_t* error_offset; - jmp_buf error_handler; - - // Parser utilities. - #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; } - #define OPTSET(OPT) ( optmsk & OPT ) - #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) longjmp(error_handler, status_out_of_memory); } - #define POPNODE() { cursor = cursor->parent; } - #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; } - #define SCANWHILE(X) { while ((X)) ++s; } - #define ENDSEG() { ch = *s; *s = 0; ++s; } - #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err) - #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); } - - xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0) - { - } - - // DOCTYPE consists of nested sections of the following possible types: - // , , "...", '...' - // - // - // First group can not contain nested groups - // Second group can contain nested groups of the same type - // Third group can contain all other groups - void parse_doctype_primitive(char_t*& s) - { - if (*s == '"' || *s == '\'') - { - // quoted string - char_t ch = *s++; - SCANFOR(*s == ch); - if (!*s) THROW_ERROR(status_bad_doctype, s); - - s++; - } - else if (s[0] == '<' && s[1] == '?') - { - // - s += 2; - SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype - if (!*s) THROW_ERROR(status_bad_doctype, s); - - s += 2; - } - else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') - { - s += 4; - SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype - if (!*s) THROW_ERROR(status_bad_doctype, s); - - s += 4; - } - else THROW_ERROR(status_bad_doctype, s); - } - - void parse_doctype_ignore(char_t*& s) - { - assert(s[0] == '<' && s[1] == '!' && s[2] == '['); - s++; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] == '[') - { - // nested ignore section - parse_doctype_ignore(s); - } - else if (s[0] == ']' && s[1] == ']' && s[2] == '>') - { - // ignore section end - s += 3; - - return; - } - else s++; - } - - THROW_ERROR(status_bad_doctype, s); - } - - void parse_doctype(char_t*& s, char_t endch, bool toplevel) - { - assert(s[0] == '<' && s[1] == '!'); - s++; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] != '-') - { - if (s[2] == '[') - { - // ignore - parse_doctype_ignore(s); - } - else - { - // some control group - parse_doctype(s, endch, false); - } - } - else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') - { - // unknown tag (forbidden), or some primitive group - parse_doctype_primitive(s); - } - else if (*s == '>') - { - s++; - - return; - } - else s++; - } - - if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s); - } - - void parse_exclamation(char_t*& ref_s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) - { - // load into registers - char_t* s = ref_s; - - // parse node contents, starting with exclamation mark - ++s; - - if (*s == '-') // 'value = s; // Save the offset. - } - - if (OPTSET(parse_eol) && OPTSET(parse_comments)) - { - s = strconv_comment(s, endch); - - if (!s) THROW_ERROR(status_bad_comment, cursor->value); - } - else - { - // Scan for terminating '-->'. - SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')); - CHECK_ERROR(status_bad_comment, s); - - if (OPTSET(parse_comments)) - *s = 0; // Zero-terminate this segment at the first terminating '-'. - - s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. - } - - if (OPTSET(parse_comments)) - { - POPNODE(); // Pop since this is a standalone. - } - } - else THROW_ERROR(status_bad_comment, s); - } - else if (*s == '[') - { - // 'value = s; // Save the offset. - - if (OPTSET(parse_eol)) - { - s = strconv_cdata(s, endch); - - if (!s) THROW_ERROR(status_bad_cdata, cursor->value); - } - else - { - // Scan for terminating ']]>'. - SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); - CHECK_ERROR(status_bad_cdata, s); - - *s++ = 0; // Zero-terminate this segment. - } - - POPNODE(); // Pop since this is a standalone. - } - else // Flagged for discard, but we still have to scan for the terminator. - { - // Scan for terminating ']]>'. - SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); - CHECK_ERROR(status_bad_cdata, s); - - ++s; - } - - s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. - } - else THROW_ERROR(status_bad_cdata, s); - } - else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E')) - { - if (s[6] != 'E') THROW_ERROR(status_bad_doctype, s); - - s -= 2; - - parse_doctype(s, endch, true); - } - else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s); - else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s); - else THROW_ERROR(status_unrecognized_tag, s); - - // store from registers - ref_s = s; - } - - void parse_question(char_t*& ref_s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) - { - // load into registers - char_t* s = ref_s; - xml_node_struct* cursor = ref_cursor; - char_t ch = 0; - - // parse node contents, starting with question mark - ++s; - - // read PI target - char_t* target = s; - - if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s); - - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); - CHECK_ERROR(status_bad_pi, s); - - // determine node type; stricmp / strcasecmp is not portable - bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; - - if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi)) - { - if (declaration) - { - // disallow non top-level declarations - if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s); - - PUSHNODE(node_declaration); - } - else - { - PUSHNODE(node_pi); - } - - cursor->name = target; - - ENDSEG(); - - // parse value/attributes - if (ch == '?') - { - // empty node - if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); - s += (*s == '>'); - - POPNODE(); - } - else if (IS_CHARTYPE(ch, ct_space)) - { - SKIPWS(); - - // scan for tag end - char_t* value = s; - - SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); - CHECK_ERROR(status_bad_pi, s); - - if (declaration) - { - // replace ending ? with / so that 'element' terminates properly - *s = '/'; - - // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES - s = value; - } - else - { - // store value and step over > - cursor->value = value; - POPNODE(); - - ENDSEG(); - - s += (*s == '>'); - } - } - else THROW_ERROR(status_bad_pi, s); - } - else - { - // scan for tag end - SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); - CHECK_ERROR(status_bad_pi, s); - - s += (s[1] == '>' ? 2 : 1); - } - - // store from registers - ref_s = s; - ref_cursor = cursor; - } - - void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) - { - strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); - strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); - - char_t ch = 0; - xml_node_struct* cursor = xmldoc; - char_t* mark = s; - - while (*s != 0) - { - if (*s == '<') - { - ++s; - - LOC_TAG: - if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' - { - PUSHNODE(node_element); // Append a new node to the tree. - - cursor->name = s; - - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. - ENDSEG(); // Save char in 'ch', terminate & step over. - - if (ch == '>') - { - // end of tag - } - else if (IS_CHARTYPE(ch, ct_space)) - { - LOC_ATTRIBUTES: - while (true) - { - SKIPWS(); // Eat any whitespace. - - if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #... - { - xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. - if (!a) THROW_ERROR(status_out_of_memory, 0); - - a->name = s; // Save the offset. - - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. - CHECK_ERROR(status_bad_attribute, s); - - ENDSEG(); // Save char in 'ch', terminate & step over. - CHECK_ERROR(status_bad_attribute, s); - - if (IS_CHARTYPE(ch, ct_space)) - { - SKIPWS(); // Eat any whitespace. - CHECK_ERROR(status_bad_attribute, s); - - ch = *s; - ++s; - } - - if (ch == '=') // '<... #=...' - { - SKIPWS(); // Eat any whitespace. - - if (*s == '"' || *s == '\'') // '<... #="...' - { - ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. - ++s; // Step over the quote. - a->value = s; // Save the offset. - - s = strconv_attribute(s, ch); - - if (!s) THROW_ERROR(status_bad_attribute, a->value); - - // After this line the loop continues from the start; - // Whitespaces, / and > are ok, symbols and EOF are wrong, - // everything else will be detected - if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); - } - else THROW_ERROR(status_bad_attribute, s); - } - else THROW_ERROR(status_bad_attribute, s); - } - else if (*s == '/') - { - ++s; - - if (*s == '>') - { - POPNODE(); - s++; - break; - } - else if (*s == 0 && endch == '>') - { - POPNODE(); - break; - } - else THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '>') - { - ++s; - - break; - } - else if (*s == 0 && endch == '>') - { - break; - } - else THROW_ERROR(status_bad_start_element, s); - } - - // !!! - } - else if (ch == '/') // '<#.../' - { - if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s); - - POPNODE(); // Pop. - - s += (*s == '>'); - } - else if (ch == 0) - { - // we stepped over null terminator, backtrack & handle closing tag - --s; - - if (endch != '>') THROW_ERROR(status_bad_start_element, s); - } - else THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '/') - { - ++s; - - char_t* name = cursor->name; - if (!name) THROW_ERROR(status_end_element_mismatch, s); - - while (IS_CHARTYPE(*s, ct_symbol)) - { - if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s); - } - - if (*name) - { - if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s); - else THROW_ERROR(status_end_element_mismatch, s); - } - - POPNODE(); // Pop. - - SKIPWS(); - - if (*s == 0) - { - if (endch != '>') THROW_ERROR(status_bad_end_element, s); - } - else - { - if (*s != '>') THROW_ERROR(status_bad_end_element, s); - ++s; - } - } - else if (*s == '?') // 'header & xml_memory_page_type_mask) == node_declaration) goto LOC_ATTRIBUTES; - } - else if (*s == '!') // 'parent) - { - PUSHNODE(node_pcdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. - - s = strconv_pcdata(s); - - POPNODE(); // Pop since this is a standalone. - - if (!*s) break; - } - else - { - SCANFOR(*s == '<'); // '...<' - if (!*s) break; - - ++s; - } - - // We're after '<' - goto LOC_TAG; - } - } - - // check that last tag is closed - if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s); - } - - static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* xmldoc, unsigned int optmsk) - { - // store buffer for offset_debug - static_cast(xmldoc)->buffer = buffer; - - // early-out for empty documents - if (length == 0) return make_parse_result(status_ok); - - // create parser on stack - xml_allocator& alloc = static_cast(xmldoc)->allocator; - - xml_parser parser(alloc); - - // save last character and make buffer zero-terminated (speeds up parsing) - char_t endch = buffer[length - 1]; - buffer[length - 1] = 0; - - // perform actual parsing - int error = setjmp(parser.error_handler); - - if (error == 0) - { - parser.parse(buffer, xmldoc, optmsk, endch); - } - - xml_parse_result result = make_parse_result(static_cast(error), parser.error_offset ? parser.error_offset - buffer : 0); - - // update allocator state - alloc = parser.alloc; - - // since we removed last character, we have to handle the only possible false positive - if (result && endch == '<') - { - // there's no possible well-formed document with < at the end - return make_parse_result(status_unrecognized_tag, length); - } - - return result; - } - }; - - // Output facilities - xml_encoding get_write_native_encoding() - { - #ifdef PUGIXML_WCHAR_MODE - return get_wchar_encoding(); - #else - return encoding_utf8; - #endif - } - - xml_encoding get_write_encoding(xml_encoding encoding) - { - // replace wchar encoding with utf implementation - if (encoding == encoding_wchar) return get_wchar_encoding(); - - // replace utf16 encoding with utf16 with specific endianness - if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - // replace utf32 encoding with utf32 with specific endianness - if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - // only do autodetection if no explicit encoding is requested - if (encoding != encoding_auto) return encoding; - - // assume utf8 encoding - return encoding_utf8; - } - -#ifdef PUGIXML_WCHAR_MODE - size_t get_valid_length(const char_t* data, size_t length) - { - assert(length > 0); - - // discard last character if it's the lead of a surrogate pair - return (sizeof(wchar_t) == 2 && (unsigned)(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; - } - - size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) - { - // only endian-swapping is required - if (need_endian_swap_utf(encoding, get_wchar_encoding())) - { - convert_wchar_endian_swap(reinterpret_cast(result), data, length); - - return length * sizeof(char_t); - } - - // convert to utf8 - if (encoding == encoding_utf8) - { - uint8_t* dest = reinterpret_cast(result); - - uint8_t* end = sizeof(wchar_t) == 2 ? - utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest) : - utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); - - return static_cast(end - dest); - } - - // convert to utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - uint16_t* dest = reinterpret_cast(result); - - // convert to native utf16 - uint16_t* end = utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint16_t); - } - - // convert to utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - uint32_t* dest = reinterpret_cast(result); - - // convert to native utf32 - uint32_t* end = utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint32_t); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return 0; - } -#else - size_t get_valid_length(const char_t* data, size_t length) - { - assert(length > 4); - - for (size_t i = 1; i <= 4; ++i) - { - uint8_t ch = static_cast(data[length - i]); - - // either a standalone character or a leading one - if ((ch & 0xc0) != 0x80) return length - i; - } - - // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk - return length; - } - - size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) - { - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - uint16_t* dest = reinterpret_cast(result); - - // convert to native utf16 - uint16_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint16_t); - } - - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - uint32_t* dest = reinterpret_cast(result); - - // convert to native utf32 - uint32_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint32_t); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return 0; - } -#endif - - class xml_buffered_writer - { - xml_buffered_writer(const xml_buffered_writer&); - xml_buffered_writer& operator=(const xml_buffered_writer&); - - public: - xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding)) - { - } - - ~xml_buffered_writer() - { - flush(); - } - - void flush() - { - flush(buffer, bufsize); - bufsize = 0; - } - - void flush(const char_t* data, size_t size) - { - if (size == 0) return; - - // fast path, just write data - if (encoding == get_write_native_encoding()) - writer.write(data, size * sizeof(char_t)); - else - { - // convert chunk - size_t result = convert_buffer(scratch, data, size, encoding); - assert(result <= sizeof(scratch)); - - // write data - writer.write(scratch, result); - } - } - - void write(const char_t* data, size_t length) - { - if (bufsize + length > bufcapacity) - { - // flush the remaining buffer contents - flush(); - - // handle large chunks - if (length > bufcapacity) - { - if (encoding == get_write_native_encoding()) - { - // fast path, can just write data chunk - writer.write(data, length * sizeof(char_t)); - return; - } - - // need to convert in suitable chunks - while (length > bufcapacity) - { - // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer - // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) - size_t chunk_size = get_valid_length(data, bufcapacity); - - // convert chunk and write - flush(data, chunk_size); - - // iterate - data += chunk_size; - length -= chunk_size; - } - - // small tail is copied below - bufsize = 0; - } - } - - memcpy(buffer + bufsize, data, length * sizeof(char_t)); - bufsize += length; - } - - void write(const char_t* data) - { - write(data, impl::strlen(data)); - } - - void write(char_t d0) - { - if (bufsize + 1 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - bufsize += 1; - } - - void write(char_t d0, char_t d1) - { - if (bufsize + 2 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - bufsize += 2; - } - - void write(char_t d0, char_t d1, char_t d2) - { - if (bufsize + 3 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - bufsize += 3; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3) - { - if (bufsize + 4 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - buffer[bufsize + 3] = d3; - bufsize += 4; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) - { - if (bufsize + 5 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - buffer[bufsize + 3] = d3; - buffer[bufsize + 4] = d4; - bufsize += 5; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) - { - if (bufsize + 6 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - buffer[bufsize + 3] = d3; - buffer[bufsize + 4] = d4; - buffer[bufsize + 5] = d5; - bufsize += 6; - } - - // utf8 maximum expansion: x4 (-> utf32) - // utf16 maximum expansion: x2 (-> utf32) - // utf32 maximum expansion: x1 - enum { bufcapacity = 2048 }; - - char_t buffer[bufcapacity]; - char scratch[4 * bufcapacity]; - - xml_writer& writer; - size_t bufsize; - xml_encoding encoding; - }; - - void write_bom(xml_writer& writer, xml_encoding encoding) - { - switch (encoding) - { - case encoding_utf8: - writer.write("\xef\xbb\xbf", 3); - break; - - case encoding_utf16_be: - writer.write("\xfe\xff", 2); - break; - - case encoding_utf16_le: - writer.write("\xff\xfe", 2); - break; - - case encoding_utf32_be: - writer.write("\x00\x00\xfe\xff", 4); - break; - - case encoding_utf32_le: - writer.write("\xff\xfe\x00\x00", 4); - break; - - default: - // invalid encoding (this should not happen) - assert(false); - } - } - - void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type) - { - while (*s) - { - const char_t* prev = s; - - // While *s is a usual symbol - while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s; - - writer.write(prev, static_cast(s - prev)); - - switch (*s) - { - case 0: break; - case '&': - writer.write('&', 'a', 'm', 'p', ';'); - ++s; - break; - case '<': - writer.write('&', 'l', 't', ';'); - ++s; - break; - case '>': - writer.write('&', 'g', 't', ';'); - ++s; - break; - case '"': - writer.write('&', 'q', 'u', 'o', 't', ';'); - ++s; - break; - default: // s is not a usual symbol - { - unsigned int ch = static_cast(*s++); - assert(ch < 32); - - writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';'); - } - } - } - } - - void node_output_attributes(xml_buffered_writer& writer, const xml_node& node) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) - { - writer.write(' '); - writer.write(a.name()[0] ? a.name() : default_name); - writer.write('=', '"'); - - text_output_escaped(writer, a.value(), oct_special_attr); - - writer.write('"'); - } - } - - void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - if ((flags & format_indent) != 0 && (flags & format_raw) == 0) - for (unsigned int i = 0; i < depth; ++i) writer.write(indent); - - switch (node.type()) - { - case node_document: - { - for (xml_node n = node.first_child(); n; n = n.next_sibling()) - node_output(writer, n, indent, flags, depth); - break; - } - - case node_element: - { - const char_t* name = node.name()[0] ? node.name() : default_name; - - writer.write('<'); - writer.write(name); - - node_output_attributes(writer, node); - - if (flags & format_raw) - { - if (!node.first_child()) - writer.write(' ', '/', '>'); - else - { - writer.write('>'); - - for (xml_node n = node.first_child(); n; n = n.next_sibling()) - node_output(writer, n, indent, flags, depth + 1); - - writer.write('<', '/'); - writer.write(name); - writer.write('>'); - } - } - else if (!node.first_child()) - writer.write(' ', '/', '>', '\n'); - else if (node.first_child() == node.last_child() && node.first_child().type() == node_pcdata) - { - writer.write('>'); - - text_output_escaped(writer, node.first_child().value(), oct_special_pcdata); - - writer.write('<', '/'); - writer.write(name); - writer.write('>', '\n'); - } - else - { - writer.write('>', '\n'); - - for (xml_node n = node.first_child(); n; n = n.next_sibling()) - node_output(writer, n, indent, flags, depth + 1); - - if ((flags & format_indent) != 0 && (flags & format_raw) == 0) - for (unsigned int i = 0; i < depth; ++i) writer.write(indent); - - writer.write('<', '/'); - writer.write(name); - writer.write('>', '\n'); - } - - break; - } - - case node_pcdata: - text_output_escaped(writer, node.value(), oct_special_pcdata); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - case node_cdata: - writer.write('<', '!', '[', 'C', 'D'); - writer.write('A', 'T', 'A', '['); - writer.write(node.value()); - writer.write(']', ']', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - case node_comment: - writer.write('<', '!', '-', '-'); - writer.write(node.value()); - writer.write('-', '-', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - case node_pi: - case node_declaration: - writer.write('<', '?'); - writer.write(node.name()[0] ? node.name() : default_name); - - if (node.type() == node_declaration) - { - node_output_attributes(writer, node); - } - else if (node.value()[0]) - { - writer.write(' '); - writer.write(node.value()); - } - - writer.write('?', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - default: - assert(false); - } - } - - inline bool has_declaration(const xml_node& node) - { - for (xml_node child = node.first_child(); child; child = child.next_sibling()) - { - xml_node_type type = child.type(); - - if (type == node_declaration) return true; - if (type == node_element) return false; - } - - return false; - } - - inline bool allow_insert_child(xml_node_type parent, xml_node_type child) - { - if (parent != node_document && parent != node_element) return false; - if (child == node_document || child == node_null) return false; - if (parent != node_document && child == node_declaration) return false; - - return true; - } - - void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip) - { - assert(dest.type() == source.type()); - - switch (source.type()) - { - case node_element: - { - dest.set_name(source.name()); - - for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) - dest.append_attribute(a.name()).set_value(a.value()); - - for (xml_node c = source.first_child(); c; c = c.next_sibling()) - { - if (c == skip) continue; - - xml_node cc = dest.append_child(c.type()); - assert(cc); - - recursive_copy_skip(cc, c, skip); - } - - break; - } - - case node_pcdata: - case node_cdata: - case node_comment: - dest.set_value(source.value()); - break; - - case node_pi: - dest.set_name(source.name()); - dest.set_value(source.value()); - break; - - case node_declaration: - { - dest.set_name(source.name()); - - for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) - dest.append_attribute(a.name()).set_value(a.value()); - - break; - } - - default: - assert(false); - } - } - -#ifndef PUGIXML_NO_STL - template xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream >& stream, unsigned int options, xml_encoding encoding) - { - if (!stream.good()) return make_parse_result(status_io_error); - - // get length of remaining data in stream - std::streamoff pos = stream.tellg(); - stream.seekg(0, std::ios::end); - std::streamoff length = stream.tellg() - pos; - stream.seekg(pos, std::ios::beg); - - if (!stream.good() || pos < 0 || length < 0) return make_parse_result(status_io_error); - - // read stream data into memory - size_t read_length = static_cast(length); - - T* s = static_cast(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T))); - if (!s) return make_parse_result(status_out_of_memory); - - stream.read(s, static_cast(read_length)); - - // check for errors - size_t actual_length = static_cast(stream.gcount()); - assert(actual_length <= read_length); - - if (read_length > 0 && actual_length == 0) - { - global_deallocate(s); - return make_parse_result(status_io_error); - } - - // load data from buffer - return doc.load_buffer_inplace_own(s, actual_length * sizeof(T), options, encoding); - } -#endif -} - -namespace pugi -{ - xml_writer_file::xml_writer_file(void* file): file(file) - { - } - - void xml_writer_file::write(const void* data, size_t size) - { - fwrite(data, size, 1, static_cast(file)); - } - -#ifndef PUGIXML_NO_STL - xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(&stream), wide_stream(0) - { - } - - xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(0), wide_stream(&stream) - { - } - - void xml_writer_stream::write(const void* data, size_t size) - { - if (narrow_stream) - { - assert(!wide_stream); - narrow_stream->write(reinterpret_cast(data), static_cast(size)); - } - else - { - assert(wide_stream); - assert(size % sizeof(wchar_t) == 0); - - wide_stream->write(reinterpret_cast(data), static_cast(size / sizeof(wchar_t))); - } - } -#endif - - xml_tree_walker::xml_tree_walker(): _depth(0) - { - } - - xml_tree_walker::~xml_tree_walker() - { - } - - int xml_tree_walker::depth() const - { - return _depth; - } - - bool xml_tree_walker::begin(xml_node&) - { - return true; - } - - bool xml_tree_walker::end(xml_node&) - { - return true; - } - - xml_attribute::xml_attribute(): _attr(0) - { - } - - xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) - { - } - - xml_attribute::operator xml_attribute::unspecified_bool_type() const - { -#ifdef __MWERKS__ - return _attr ? &xml_attribute::empty : 0; -#else - return _attr ? &xml_attribute::_attr : 0; -#endif - } - - bool xml_attribute::operator!() const - { - return !_attr; - } - - bool xml_attribute::operator==(const xml_attribute& r) const - { - return (_attr == r._attr); - } - - bool xml_attribute::operator!=(const xml_attribute& r) const - { - return (_attr != r._attr); - } - - bool xml_attribute::operator<(const xml_attribute& r) const - { - return (_attr < r._attr); - } - - bool xml_attribute::operator>(const xml_attribute& r) const - { - return (_attr > r._attr); - } - - bool xml_attribute::operator<=(const xml_attribute& r) const - { - return (_attr <= r._attr); - } - - bool xml_attribute::operator>=(const xml_attribute& r) const - { - return (_attr >= r._attr); - } - - xml_attribute xml_attribute::next_attribute() const - { - return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); - } - - xml_attribute xml_attribute::previous_attribute() const - { - return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); - } - - int xml_attribute::as_int() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return (int)wcstol(_attr->value, 0, 10); - #else - return (int)strtol(_attr->value, 0, 10); - #endif - } - - unsigned int xml_attribute::as_uint() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return (unsigned int)wcstoul(_attr->value, 0, 10); - #else - return (unsigned int)strtoul(_attr->value, 0, 10); - #endif - } - - double xml_attribute::as_double() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return wcstod(_attr->value, 0); - #else - return strtod(_attr->value, 0); - #endif - } - - float xml_attribute::as_float() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return (float)wcstod(_attr->value, 0); - #else - return (float)strtod(_attr->value, 0); - #endif - } - - bool xml_attribute::as_bool() const - { - if (!_attr || !_attr->value) return false; - - // only look at first char - char_t first = *_attr->value; - - // 1*, t* (true), T* (True), y* (yes), Y* (YES) - return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); - } - - bool xml_attribute::empty() const - { - return !_attr; - } - - const char_t* xml_attribute::name() const - { - return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT(""); - } - - const char_t* xml_attribute::value() const - { - return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT(""); - } - - unsigned int xml_attribute::document_order() const - { - return 0; - } - - xml_attribute& xml_attribute::operator=(const char_t* rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(int rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(unsigned int rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(double rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(bool rhs) - { - set_value(rhs); - return *this; - } - - bool xml_attribute::set_name(const char_t* rhs) - { - if (!_attr) return false; - - return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs); - } - - bool xml_attribute::set_value(const char_t* rhs) - { - if (!_attr) return false; - - return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs); - } - - bool xml_attribute::set_value(int rhs) - { - char buf[128]; - sprintf(buf, "%d", rhs); - - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); - - return set_value(wbuf); - #else - return set_value(buf); - #endif - } - - bool xml_attribute::set_value(unsigned int rhs) - { - char buf[128]; - sprintf(buf, "%u", rhs); - - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); - - return set_value(wbuf); - #else - return set_value(buf); - #endif - } - - bool xml_attribute::set_value(double rhs) - { - char buf[128]; - sprintf(buf, "%g", rhs); - - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); - - return set_value(wbuf); - #else - return set_value(buf); - #endif - } - - bool xml_attribute::set_value(bool rhs) - { - return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); - } - -#ifdef __BORLANDC__ - bool operator&&(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - bool operator||(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - xml_node::xml_node(): _root(0) - { - } - - xml_node::xml_node(xml_node_struct* p): _root(p) - { - } - - xml_node::operator xml_node::unspecified_bool_type() const - { -#ifdef __MWERKS__ - return _root ? &xml_node::empty : 0; -#else - return _root ? &xml_node::_root : 0; -#endif - } - - bool xml_node::operator!() const - { - return !_root; - } - - xml_node::iterator xml_node::begin() const - { - return iterator(_root ? _root->first_child : 0, _root); - } - - xml_node::iterator xml_node::end() const - { - return iterator(0, _root); - } - - xml_node::attribute_iterator xml_node::attributes_begin() const - { - return attribute_iterator(_root ? _root->first_attribute : 0, _root); - } - - xml_node::attribute_iterator xml_node::attributes_end() const - { - return attribute_iterator(0, _root); - } - - bool xml_node::operator==(const xml_node& r) const - { - return (_root == r._root); - } - - bool xml_node::operator!=(const xml_node& r) const - { - return (_root != r._root); - } - - bool xml_node::operator<(const xml_node& r) const - { - return (_root < r._root); - } - - bool xml_node::operator>(const xml_node& r) const - { - return (_root > r._root); - } - - bool xml_node::operator<=(const xml_node& r) const - { - return (_root <= r._root); - } - - bool xml_node::operator>=(const xml_node& r) const - { - return (_root >= r._root); - } - - bool xml_node::empty() const - { - return !_root; - } - - const char_t* xml_node::name() const - { - return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); - } - - xml_node_type xml_node::type() const - { - return _root ? static_cast(_root->header & xml_memory_page_type_mask) : node_null; - } - - const char_t* xml_node::value() const - { - return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); - } - - xml_node xml_node::child(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::child_w(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_attribute xml_node::attribute(const char_t* name) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) - if (i->name && impl::strequal(name, i->name)) - return xml_attribute(i); - - return xml_attribute(); - } - - xml_attribute xml_node::attribute_w(const char_t* name) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) - if (i->name && impl::strequalwild(name, i->name)) - return xml_attribute(i); - - return xml_attribute(); - } - - xml_node xml_node::next_sibling(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequal(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::next_sibling_w(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::next_sibling() const - { - if (!_root) return xml_node(); - - if (_root->next_sibling) return xml_node(_root->next_sibling); - else return xml_node(); - } - - xml_node xml_node::previous_sibling(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequal(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::previous_sibling_w(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::previous_sibling() const - { - if (!_root) return xml_node(); - - if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); - else return xml_node(); - } - - xml_node xml_node::parent() const - { - return _root ? xml_node(_root->parent) : xml_node(); - } - - xml_node xml_node::root() const - { - xml_node_struct* r = _root; - - while (r && r->parent) r = r->parent; - - return xml_node(r); - } - - const char_t* xml_node::child_value() const - { - if (!_root) return PUGIXML_TEXT(""); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - { - xml_node_type type = static_cast(i->header & xml_memory_page_type_mask); - - if (i->value && (type == node_pcdata || type == node_cdata)) - return i->value; - } - - return PUGIXML_TEXT(""); - } - - const char_t* xml_node::child_value(const char_t* name) const - { - return child(name).child_value(); - } - - const char_t* xml_node::child_value_w(const char_t* name) const - { - if (!_root) return PUGIXML_TEXT(""); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i).child_value(); - - return PUGIXML_TEXT(""); - } - - xml_attribute xml_node::first_attribute() const - { - return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); - } - - xml_attribute xml_node::last_attribute() const - { - return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); - } - - xml_node xml_node::first_child() const - { - return _root ? xml_node(_root->first_child) : xml_node(); - } - - xml_node xml_node::last_child() const - { - return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); - } - - bool xml_node::set_name(const char_t* rhs) - { - switch (type()) - { - case node_pi: - case node_declaration: - case node_element: - return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs); - - default: - return false; - } - } - - bool xml_node::set_value(const char_t* rhs) - { - switch (type()) - { - case node_pi: - case node_cdata: - case node_pcdata: - case node_comment: - return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs); - - default: - return false; - } - } - - xml_attribute xml_node::append_attribute(const char_t* name) - { - if (type() != node_element && type() != node_declaration) return xml_attribute(); - - xml_attribute a(append_attribute_ll(_root, get_allocator(_root))); - if (!a) return xml_attribute(); - - a.set_name(name); - - return a; - } - - xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr) - { - if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); - - // check that attribute belongs to *this - xml_attribute_struct* cur = attr._attr; - - while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; - - if (cur != _root->first_attribute) return xml_attribute(); - - xml_attribute a(allocate_attribute(get_allocator(_root))); - if (!a) return xml_attribute(); - - a.set_name(name); - - if (attr._attr->prev_attribute_c->next_attribute) - attr._attr->prev_attribute_c->next_attribute = a._attr; - else - _root->first_attribute = a._attr; - - a._attr->prev_attribute_c = attr._attr->prev_attribute_c; - a._attr->next_attribute = attr._attr; - attr._attr->prev_attribute_c = a._attr; - - return a; - } - - xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr) - { - if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); - - // check that attribute belongs to *this - xml_attribute_struct* cur = attr._attr; - - while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; - - if (cur != _root->first_attribute) return xml_attribute(); - - xml_attribute a(allocate_attribute(get_allocator(_root))); - if (!a) return xml_attribute(); - - a.set_name(name); - - if (attr._attr->next_attribute) - attr._attr->next_attribute->prev_attribute_c = a._attr; - else - _root->first_attribute->prev_attribute_c = a._attr; - - a._attr->next_attribute = attr._attr->next_attribute; - a._attr->prev_attribute_c = attr._attr; - attr._attr->next_attribute = a._attr; - - return a; - } - - xml_attribute xml_node::append_copy(const xml_attribute& proto) - { - if (!proto) return xml_attribute(); - - xml_attribute result = append_attribute(proto.name()); - result.set_value(proto.value()); - - return result; - } - - xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - - xml_attribute result = insert_attribute_after(proto.name(), attr); - result.set_value(proto.value()); - - return result; - } - - xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - - xml_attribute result = insert_attribute_before(proto.name(), attr); - result.set_value(proto.value()); - - return result; - } - - xml_node xml_node::append_child(xml_node_type type) - { - if (!allow_insert_child(this->type(), type)) return xml_node(); - - xml_node n(append_node(_root, get_allocator(_root), type)); - if (!n) return xml_node(); - - if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node) - { - if (!allow_insert_child(this->type(), type)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - xml_node n(allocate_node(get_allocator(_root), type)); - if (!n) return xml_node(); - - n._root->parent = _root; - - if (node._root->prev_sibling_c->next_sibling) - node._root->prev_sibling_c->next_sibling = n._root; - else - _root->first_child = n._root; - - n._root->prev_sibling_c = node._root->prev_sibling_c; - n._root->next_sibling = node._root; - node._root->prev_sibling_c = n._root; - - if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node) - { - if (!allow_insert_child(this->type(), type)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - xml_node n(allocate_node(get_allocator(_root), type)); - if (!n) return xml_node(); - - n._root->parent = _root; - - if (node._root->next_sibling) - node._root->next_sibling->prev_sibling_c = n._root; - else - _root->first_child->prev_sibling_c = n._root; - - n._root->next_sibling = node._root->next_sibling; - n._root->prev_sibling_c = node._root; - node._root->next_sibling = n._root; - - if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - xml_node xml_node::append_copy(const xml_node& proto) - { - xml_node result = append_child(proto.type()); - - if (result) recursive_copy_skip(result, proto, result); - - return result; - } - - xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) - { - xml_node result = insert_child_after(proto.type(), node); - - if (result) recursive_copy_skip(result, proto, result); - - return result; - } - - xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) - { - xml_node result = insert_child_before(proto.type(), node); - - if (result) recursive_copy_skip(result, proto, result); - - return result; - } - - bool xml_node::remove_attribute(const char_t* name) - { - return remove_attribute(attribute(name)); - } - - bool xml_node::remove_attribute(const xml_attribute& a) - { - if (!_root || !a._attr) return false; - - // check that attribute belongs to *this - xml_attribute_struct* attr = a._attr; - - while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c; - - if (attr != _root->first_attribute) return false; - - if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c; - else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c; - - if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute; - else _root->first_attribute = a._attr->next_attribute; - - destroy_attribute(a._attr, get_allocator(_root)); - - return true; - } - - bool xml_node::remove_child(const char_t* name) - { - return remove_child(child(name)); - } - - bool xml_node::remove_child(const xml_node& n) - { - if (!_root || !n._root || n._root->parent != _root) return false; - - if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c; - else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c; - - if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling; - else _root->first_child = n._root->next_sibling; - - destroy_node(n._root, get_allocator(_root)); - - return true; - } - - xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name, i->name)) - { - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) - return xml_node(i); - } - - return xml_node(); - } - - xml_node xml_node::find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) - { - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) - return xml_node(i); - } - - return xml_node(); - } - - xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) - return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) - return xml_node(i); - - return xml_node(); - } - -#ifndef PUGIXML_NO_STL - string_t xml_node::path(char_t delimiter) const - { - string_t path; - - xml_node cursor = *this; // Make a copy. - - path = cursor.name(); - - while (cursor.parent()) - { - cursor = cursor.parent(); - - string_t temp = cursor.name(); - temp += delimiter; - temp += path; - path.swap(temp); - } - - return path; - } -#endif - - xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const - { - xml_node found = *this; // Current search context. - - if (!_root || !path || !path[0]) return found; - - if (path[0] == delimiter) - { - // Absolute path; e.g. '/foo/bar' - while (found.parent()) found = found.parent(); - ++path; - } - - const char_t* path_segment = path; - - while (*path_segment == delimiter) ++path_segment; - - const char_t* path_segment_end = path_segment; - - while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; - - if (path_segment == path_segment_end) return found; - - const char_t* next_segment = path_segment_end; - - while (*next_segment == delimiter) ++next_segment; - - if (*path_segment == '.' && path_segment + 1 == path_segment_end) - return found.first_element_by_path(next_segment, delimiter); - else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) - return found.parent().first_element_by_path(next_segment, delimiter); - else - { - for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) - { - if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) - { - xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); - - if (subsearch) return subsearch; - } - } - - return xml_node(); - } - } - - bool xml_node::traverse(xml_tree_walker& walker) - { - walker._depth = -1; - - xml_node arg_begin = *this; - if (!walker.begin(arg_begin)) return false; - - xml_node cur = first_child(); - - if (cur) - { - ++walker._depth; - - do - { - xml_node arg_for_each = cur; - if (!walker.for_each(arg_for_each)) - return false; - - if (cur.first_child()) - { - ++walker._depth; - cur = cur.first_child(); - } - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - // Borland C++ workaround - while (!cur.next_sibling() && cur != *this && (bool)cur.parent()) - { - --walker._depth; - cur = cur.parent(); - } - - if (cur != *this) - cur = cur.next_sibling(); - } - } - while (cur && cur != *this); - } - - assert(walker._depth == -1); - - xml_node arg_end = *this; - return walker.end(arg_end); - } - - unsigned int xml_node::document_order() const - { - return 0; - } - - void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - if (!_root) return; - - xml_buffered_writer buffered_writer(writer, encoding); - - node_output(buffered_writer, *this, indent, flags, depth); - } - -#ifndef PUGIXML_NO_STL - void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - if (!_root) return; - - xml_writer_stream writer(stream); - - print(writer, indent, flags, encoding, depth); - } - - void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const - { - if (!_root) return; - - xml_writer_stream writer(stream); - - print(writer, indent, flags, encoding_wchar, depth); - } -#endif - - ptrdiff_t xml_node::offset_debug() const - { - xml_node_struct* r = root()._root; - - if (!r) return -1; - - const char_t* buffer = static_cast(r)->buffer; - - if (!buffer) return -1; - - switch (type()) - { - case node_document: - return 0; - - case node_element: - case node_declaration: - case node_pi: - return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer; - - case node_pcdata: - case node_cdata: - case node_comment: - return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer; - - default: - return -1; - } - } - -#ifdef __BORLANDC__ - bool operator&&(const xml_node& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - bool operator||(const xml_node& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - xml_node_iterator::xml_node_iterator() - { - } - - xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) - { - } - - xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const - { - return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; - } - - bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const - { - return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; - } - - xml_node& xml_node_iterator::operator*() - { - assert(_wrap._root); - return _wrap; - } - - xml_node* xml_node_iterator::operator->() - { - assert(_wrap._root); - return &_wrap; - } - - const xml_node_iterator& xml_node_iterator::operator++() - { - assert(_wrap._root); - _wrap._root = _wrap._root->next_sibling; - return *this; - } - - xml_node_iterator xml_node_iterator::operator++(int) - { - xml_node_iterator temp = *this; - ++*this; - return temp; - } - - const xml_node_iterator& xml_node_iterator::operator--() - { - if (_wrap._root) _wrap = _wrap.previous_sibling(); - else _wrap = _parent.last_child(); - return *this; - } - - xml_node_iterator xml_node_iterator::operator--(int) - { - xml_node_iterator temp = *this; - --*this; - return temp; - } - - xml_attribute_iterator::xml_attribute_iterator() - { - } - - xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) - { - } - - xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const - { - return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; - } - - bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const - { - return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; - } - - xml_attribute& xml_attribute_iterator::operator*() - { - assert(_wrap._attr); - return _wrap; - } - - xml_attribute* xml_attribute_iterator::operator->() - { - assert(_wrap._attr); - return &_wrap; - } - - const xml_attribute_iterator& xml_attribute_iterator::operator++() - { - assert(_wrap._attr); - _wrap._attr = _wrap._attr->next_attribute; - return *this; - } - - xml_attribute_iterator xml_attribute_iterator::operator++(int) - { - xml_attribute_iterator temp = *this; - ++*this; - return temp; - } - - const xml_attribute_iterator& xml_attribute_iterator::operator--() - { - if (_wrap._attr) _wrap = _wrap.previous_attribute(); - else _wrap = _parent.last_attribute(); - return *this; - } - - xml_attribute_iterator xml_attribute_iterator::operator--(int) - { - xml_attribute_iterator temp = *this; - --*this; - return temp; - } - - const char* xml_parse_result::description() const - { - switch (status) - { - case status_ok: return "No error"; - - case status_file_not_found: return "File was not found"; - case status_io_error: return "Error reading from file/stream"; - case status_out_of_memory: return "Could not allocate memory"; - case status_internal_error: return "Internal error occurred"; - - case status_unrecognized_tag: return "Could not determine tag type"; - - case status_bad_pi: return "Error parsing document declaration/processing instruction"; - case status_bad_comment: return "Error parsing comment"; - case status_bad_cdata: return "Error parsing CDATA section"; - case status_bad_doctype: return "Error parsing document type declaration"; - case status_bad_pcdata: return "Error parsing PCDATA section"; - case status_bad_start_element: return "Error parsing start element tag"; - case status_bad_attribute: return "Error parsing element attribute"; - case status_bad_end_element: return "Error parsing end element tag"; - case status_end_element_mismatch: return "Start-end tags mismatch"; - - default: return "Unknown error"; - } - } - - xml_document::xml_document(): _buffer(0) - { - create(); - } - - xml_document::~xml_document() - { - destroy(); - } - - void xml_document::create() - { - destroy(); - - // initialize sentinel page - STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory)); - - // align upwards to page boundary - void* page_memory = reinterpret_cast((reinterpret_cast(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); - - // prepare page structure - xml_memory_page* page = xml_memory_page::construct(page_memory); - - page->busy_size = xml_memory_page_size; - - // allocate new root - _root = new (page->data) xml_document_struct(page); - _root->prev_sibling_c = _root; - - // setup allocator - xml_allocator& a = static_cast(_root)->allocator; - a = xml_allocator(page); - - // setup sentinel page - page->allocator = &a; - } - - void xml_document::destroy() - { - // destroy static storage - if (_buffer) - { - global_deallocate(_buffer); - _buffer = 0; - } - - // destroy dynamic storage, leave sentinel page (it's in static memory) - if (_root) - { - xml_memory_page* root_page = reinterpret_cast(_root->header & xml_memory_page_pointer_mask); - assert(root_page && !root_page->prev && !root_page->memory); - - // destroy all pages - for (xml_memory_page* page = root_page->next; page; ) - { - xml_memory_page* next = page->next; - - xml_allocator::deallocate_page(page); - - page = next; - } - - // cleanup root page - root_page->allocator = 0; - root_page->next = 0; - root_page->busy_size = root_page->freed_size = 0; - - _root = 0; - } - } - -#ifndef PUGIXML_NO_STL - xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) - { - create(); - - return load_stream_impl(*this, stream, options, encoding); - } - - xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) - { - create(); - - return load_stream_impl(*this, stream, options, encoding_wchar); - } -#endif - - xml_parse_result xml_document::load(const char_t* contents, unsigned int options) - { - create(); - - // Force native encoding (skip autodetection) - #ifdef PUGIXML_WCHAR_MODE - xml_encoding encoding = encoding_wchar; - #else - xml_encoding encoding = encoding_utf8; - #endif - - return load_buffer(contents, impl::strlen(contents) * sizeof(char_t), options, encoding); - } - - xml_parse_result xml_document::parse(char* xmlstr, unsigned int options) - { - return load_buffer_inplace(xmlstr, strlen(xmlstr), options, encoding_utf8); - } - - xml_parse_result xml_document::parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options) - { - return load_buffer_inplace_own(xmlstr, strlen(xmlstr), options, encoding_utf8); - } - - xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding) - { - create(); - - FILE* file = fopen(path, "rb"); - if (!file) return make_parse_result(status_file_not_found); - - fseek(file, 0, SEEK_END); - long length = ftell(file); - fseek(file, 0, SEEK_SET); - - if (length < 0) - { - fclose(file); - return make_parse_result(status_io_error); - } - - char* s = static_cast(global_allocate(length > 0 ? length : 1)); - - if (!s) - { - fclose(file); - return make_parse_result(status_out_of_memory); - } - - size_t read = fread(s, 1, (size_t)length, file); - fclose(file); - - if (read != (size_t)length) - { - global_deallocate(s); - return make_parse_result(status_io_error); - } - - return load_buffer_inplace_own(s, length, options, encoding); - } - - xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own) - { - create(); - - // get actual encoding - xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size); - - // get private buffer - char_t* buffer = 0; - size_t length = 0; - - if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory); - - // delete original buffer if we performed a conversion - if (own && buffer != contents) global_deallocate(contents); - - // parse - xml_parse_result res = xml_parser::parse(buffer, length, _root, options); - - // remember encoding - res.encoding = buffer_encoding; - - // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself - if (own || buffer != contents) _buffer = buffer; - - return res; - } - - xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - return load_buffer_impl(const_cast(contents), size, options, encoding, false, false); - } - - xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - return load_buffer_impl(contents, size, options, encoding, true, false); - } - - xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - return load_buffer_impl(contents, size, options, encoding, true, true); - } - - void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding)); - - xml_buffered_writer buffered_writer(writer, encoding); - - if (!(flags & format_no_declaration) && !has_declaration(*this)) - { - buffered_writer.write(PUGIXML_TEXT("")); - if (!(flags & format_raw)) buffered_writer.write('\n'); - } - - node_output(buffered_writer, *this, indent, flags, 0); - } - -#ifndef PUGIXML_NO_STL - void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - xml_writer_stream writer(stream); - - save(writer, indent, flags, encoding); - } - - void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags) const - { - xml_writer_stream writer(stream); - - save(writer, indent, flags, encoding_wchar); - } -#endif - - bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - FILE* file = fopen(path, "wb"); - if (!file) return false; - - xml_writer_file writer(file); - save(writer, indent, flags, encoding); - - fclose(file); - - return true; - } - - void xml_document::precompute_document_order() - { - } - -#ifndef PUGIXML_NO_STL - std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) - { - assert(str); - - STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); - - size_t length = wcslen(str); - - // first pass: get length in utf8 characters - size_t size = sizeof(wchar_t) == 2 ? - utf_decoder::decode_utf16_block(reinterpret_cast(str), length, 0) : - utf_decoder::decode_utf32_block(reinterpret_cast(str), length, 0); - - // allocate resulting string - std::string result; - result.resize(size); - - // second pass: convert to utf8 - if (size > 0) - { - uint8_t* begin = reinterpret_cast(&result[0]); - uint8_t* end = sizeof(wchar_t) == 2 ? - utf_decoder::decode_utf16_block(reinterpret_cast(str), length, begin) : - utf_decoder::decode_utf32_block(reinterpret_cast(str), length, begin); - - // truncate invalid output - assert(begin <= end && static_cast(end - begin) <= result.size()); - result.resize(static_cast(end - begin)); - } - - return result; - } - - std::wstring PUGIXML_FUNCTION as_utf16(const char* str) - { - return as_wide(str); - } - - std::wstring PUGIXML_FUNCTION as_wide(const char* str) - { - assert(str); - - const uint8_t* data = reinterpret_cast(str); - size_t size = strlen(str); - - // first pass: get length in wchar_t - size_t length = utf_decoder::decode_utf8_block(data, size, 0); - - // allocate resulting string - std::wstring result; - result.resize(length); - - // second pass: convert to wchar_t - if (length > 0) - { - wchar_writer::value_type begin = reinterpret_cast(&result[0]); - wchar_writer::value_type end = utf_decoder::decode_utf8_block(data, size, begin); - - // truncate invalid output - assert(begin <= end && static_cast(end - begin) <= result.size()); - result.resize(static_cast(end - begin)); - } - - return result; - } -#endif - - void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) - { - global_allocate = allocate; - global_deallocate = deallocate; - } - - allocation_function PUGIXML_FUNCTION get_memory_allocation_function() - { - return global_allocate; - } - - deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() - { - return global_deallocate; - } -} - -#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) -namespace std -{ - // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } -} -#endif - -#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) -namespace std -{ - // Workarounds for (non-standard) iterator category detection - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } -} -#endif - -/** - * Copyright (c) 2006-2010 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/** + * pugixml parser - version 0.9 + * -------------------------------------------------------- + * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at http://code.google.com/p/pugixml/ + * + * This library is distributed under the MIT License. See notice at the end + * of this file. + * + * This work is based on the pugxml parser, which is: + * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) + */ + +#include "pugixml.hpp" + +#include +#include +#include +#include +#include +#include + +#ifndef PUGIXML_NO_STL +# include +# include +# include +#endif + +// For placement new +#include + +#ifdef _MSC_VER +# pragma warning(disable: 4127) // conditional expression is constant +# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable +# pragma warning(disable: 4324) // structure was padded due to __declspec(align()) +# pragma warning(disable: 4996) // this function or variable may be unsafe +#endif + +#ifdef __INTEL_COMPILER +# pragma warning(disable: 177) // function was declared but never referenced +# pragma warning(disable: 1478 1786) // function was declared "deprecated" +#endif + +#ifdef __BORLANDC__ +# pragma warn -8008 // condition is always false +# pragma warn -8066 // unreachable code +#endif + +#ifdef __SNC__ +# pragma diag_suppress=178 // function waS declared but never referenced +#endif + +// uintptr_t +#if !defined(_MSC_VER) || _MSC_VER >= 1600 +# include +#else +# if _MSC_VER < 1300 +// No native uintptr_t in MSVC6 +typedef size_t uintptr_t; +# endif +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +#endif + +// Inlining controls +#if defined(_MSC_VER) && _MSC_VER >= 1300 +# define PUGIXML_NO_INLINE __declspec(noinline) +#elif defined(__GNUC__) +# define PUGIXML_NO_INLINE __attribute__((noinline)) +#else +# define PUGIXML_NO_INLINE +#endif + +// Simple static assertion +#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } + +// Memory allocation +namespace +{ + void* default_allocate(size_t size) + { + return malloc(size); + } + + void default_deallocate(void* ptr) + { + free(ptr); + } + + pugi::allocation_function global_allocate = default_allocate; + pugi::deallocation_function global_deallocate = default_deallocate; +} + +// String utilities prototypes +namespace pugi +{ + namespace impl + { + size_t strlen(const char_t* s); + bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); + void widen_ascii(wchar_t* dest, const char* source); + } +} + +// String utilities +namespace pugi +{ + namespace impl + { + // Get string length + size_t strlen(const char_t* s) + { + #ifdef PUGIXML_WCHAR_MODE + return wcslen(s); + #else + return ::strlen(s); + #endif + } + + // Compare two strings + bool PUGIXML_FUNCTION strequal(const char_t* src, const char_t* dst) + { + #ifdef PUGIXML_WCHAR_MODE + return wcscmp(src, dst) == 0; + #else + return strcmp(src, dst) == 0; + #endif + } + + // Compare lhs with [rhs_begin, rhs_end) + bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) + { + for (size_t i = 0; i < count; ++i) + if (lhs[i] != rhs[i]) + return false; + + return lhs[count] == 0; + } + + // Character set pattern match. + static bool strequalwild_cset(const char_t** src, const char_t** dst) + { + int find = 0, excl = 0, star = 0; + + if (**src == '!') + { + excl = 1; + ++(*src); + } + + while (**src != ']' || star == 1) + { + if (find == 0) + { + if (**src == '-' && *(*src-1) < *(*src+1) && *(*src+1) != ']' && star == 0) + { + if (**dst >= *(*src-1) && **dst <= *(*src+1)) + { + find = 1; + ++(*src); + } + } + else if (**src == **dst) find = 1; + } + ++(*src); + star = 0; + } + + if (excl == 1) find = (1 - find); + if (find == 1) ++(*dst); + + return find == 0; + } + + // Wildcard pattern match. + static bool strequalwild_astr(const char_t** src, const char_t** dst) + { + int find = 1; + ++(*src); + while ((**dst != 0 && **src == '?') || **src == '*') + { + if(**src == '?') ++(*dst); + ++(*src); + } + while (**src == '*') ++(*src); + if (**dst == 0 && **src != 0) return 0; + if (**dst == 0 && **src == 0) return 1; + else + { + if (!impl::strequalwild(*src,*dst)) + { + do + { + ++(*dst); + while(**src != **dst && **src != '[' && **dst != 0) + ++(*dst); + } + while ((**dst != 0) ? !impl::strequalwild(*src,*dst) : 0 != (find=0)); + } + if (**dst == 0 && **src == 0) find = 1; + return find == 0; + } + } + + // Compare two strings, with globbing, and character sets. + bool PUGIXML_FUNCTION strequalwild(const char_t* src, const char_t* dst) + { + int find = 1; + for(; *src != 0 && find == 1 && *dst != 0; ++src) + { + switch (*src) + { + case '?': ++dst; break; + case '[': ++src; find = !strequalwild_cset(&src,&dst); break; + case '*': find = !strequalwild_astr(&src,&dst); --src; break; + default : find = (int) (*src == *dst); ++dst; + } + } + while (*src == '*' && find == 1) ++src; + return (find == 1 && *dst == 0 && *src == 0); + } + +#ifdef PUGIXML_WCHAR_MODE + // Convert string to wide string, assuming all symbols are ASCII + void widen_ascii(wchar_t* dest, const char* source) + { + for (const char* i = source; *i; ++i) *dest++ = *i; + *dest = 0; + } +#endif + } +} + +namespace pugi +{ + static const size_t xml_memory_page_size = 32768; + + static const uintptr_t xml_memory_page_alignment = 32; + static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); + static const uintptr_t xml_memory_page_name_allocated_mask = 16; + static const uintptr_t xml_memory_page_value_allocated_mask = 8; + static const uintptr_t xml_memory_page_type_mask = 7; + + struct xml_allocator; + + struct xml_memory_page + { + static xml_memory_page* construct(void* memory) + { + if (!memory) return 0; + + xml_memory_page* result = static_cast(memory); + + result->allocator = 0; + result->memory = 0; + result->prev = 0; + result->next = 0; + result->busy_size = 0; + result->freed_size = 0; + + return result; + } + + xml_allocator* allocator; + + void* memory; + + xml_memory_page* prev; + xml_memory_page* next; + + size_t busy_size; + size_t freed_size; + + char data[1]; + }; + + struct xml_memory_string_header + { + xml_memory_page* page; + size_t full_size; + }; + + struct xml_allocator + { + xml_allocator(xml_memory_page* root): _root(root), _busy_size(root ? root->busy_size : 0) + { + } + + xml_memory_page* allocate_page(size_t data_size) + { + size_t size = offsetof(xml_memory_page, data) + data_size; + + // allocate block with some alignment, leaving memory for worst-case padding + void* memory = global_allocate(size + xml_memory_page_alignment); + if (!memory) return 0; + + // align upwards to page boundary + void* page_memory = reinterpret_cast((reinterpret_cast(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); + + // prepare page structure + xml_memory_page* page = xml_memory_page::construct(page_memory); + + page->memory = memory; + page->allocator = _root->allocator; + + return page; + } + + static void deallocate_page(xml_memory_page* page) + { + global_deallocate(page->memory); + } + + void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); + + void* allocate_memory(size_t size, xml_memory_page*& out_page) + { + if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page); + + void* buf = _root->data + _busy_size; + + _busy_size += size; + + out_page = _root; + + return buf; + } + + void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) + { + assert(ptr >= page->data && ptr < page->data + xml_memory_page_size); + (void)!ptr; + + if (page == _root) page->busy_size = _busy_size; + + page->freed_size += size; + assert(page->freed_size <= page->busy_size); + + if (page->freed_size == page->busy_size) + { + if (page->next == 0) + { + assert(_root == page); + + // top page freed, just reset sizes + page->busy_size = page->freed_size = 0; + _busy_size = 0; + } + else + { + assert(_root != page); + assert(page->prev); + + // remove from the list + page->prev->next = page->next; + page->next->prev = page->prev; + + // deallocate + deallocate_page(page); + } + } + } + + char_t* allocate_string(size_t length) + { + // get actual size, rounded up to pointer alignment boundary + size_t size = ((length * sizeof(char_t)) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1); + + // allocate memory for string and header block + size_t full_size = sizeof(xml_memory_string_header) + size; + + xml_memory_page* page; + xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); + + if (!header) return 0; + + // setup header + header->page = page; + header->full_size = full_size; + + return reinterpret_cast(header + 1); + } + + void deallocate_string(char_t* string) + { + // get header + xml_memory_string_header* header = reinterpret_cast(string) - 1; + + // deallocate + deallocate_memory(header, header->full_size, header->page); + } + + xml_memory_page* _root; + size_t _busy_size; + }; + + PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) + { + const size_t large_allocation_threshold = xml_memory_page_size / 4; + + xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); + if (!page) return 0; + + if (size <= large_allocation_threshold) + { + _root->busy_size = _busy_size; + + // insert page at the end of linked list + page->prev = _root; + _root->next = page; + _root = page; + + _busy_size = size; + } + else + { + // insert page before the end of linked list + assert(_root->prev); + + page->prev = _root->prev; + page->next = _root; + + _root->prev->next = page; + _root->prev = page; + } + + // allocate inside page + page->busy_size = size; + + out_page = page; + return page->data; + } + + /// A 'name=value' XML attribute structure. + struct xml_attribute_struct + { + /// Default ctor + xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) + { + } + + uintptr_t header; + + char_t* name; ///< Pointer to attribute name. + char_t* value; ///< Pointer to attribute value. + + xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) + xml_attribute_struct* next_attribute; ///< Next attribute + }; + + /// An XML document tree node. + struct xml_node_struct + { + /// Default ctor + /// \param type - node type + xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | type), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) + { + } + + uintptr_t header; + + xml_node_struct* parent; ///< Pointer to parent + + char_t* name; ///< Pointer to element name. + char_t* value; ///< Pointer to any associated string data. + + xml_node_struct* first_child; ///< First child + + xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list) + xml_node_struct* next_sibling; ///< Right brother + + xml_attribute_struct* first_attribute; ///< First attribute + }; + + struct xml_document_struct: public xml_node_struct + { + xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), allocator(0), buffer(0) + { + } + + xml_allocator allocator; + const char_t* buffer; + }; + + inline xml_allocator& get_allocator(const xml_node_struct* node) + { + assert(node); + + return *reinterpret_cast(node->header & xml_memory_page_pointer_mask)->allocator; + } +} + +// Low-level DOM operations +namespace +{ + using namespace pugi; + + inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) + { + xml_memory_page* page; + void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page); + + return new (memory) xml_attribute_struct(page); + } + + inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) + { + xml_memory_page* page; + void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page); + + return new (memory) xml_node_struct(page, type); + } + + inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) + { + uintptr_t header = a->header; + + if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name); + if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value); + + alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); + } + + inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) + { + uintptr_t header = n->header; + + if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); + if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); + + for (xml_attribute_struct* attr = n->first_attribute; attr; ) + { + xml_attribute_struct* next = attr->next_attribute; + + destroy_attribute(attr, alloc); + + attr = next; + } + + for (xml_node_struct* child = n->first_child; child; ) + { + xml_node_struct* next = child->next_sibling; + + destroy_node(child, alloc); + + child = next; + } + + alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); + } + + PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) + { + xml_node_struct* child = allocate_node(alloc, type); + if (!child) return 0; + + child->parent = node; + + xml_node_struct* first_child = node->first_child; + + if (first_child) + { + xml_node_struct* last_child = first_child->prev_sibling_c; + + last_child->next_sibling = child; + child->prev_sibling_c = last_child; + first_child->prev_sibling_c = child; + } + else + { + node->first_child = child; + child->prev_sibling_c = child; + } + + return child; + } + + PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc) + { + xml_attribute_struct* a = allocate_attribute(alloc); + if (!a) return 0; + + xml_attribute_struct* first_attribute = node->first_attribute; + + if (first_attribute) + { + xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c; + + last_attribute->next_attribute = a; + a->prev_attribute_c = last_attribute; + first_attribute->prev_attribute_c = a; + } + else + { + node->first_attribute = a; + a->prev_attribute_c = a; + } + + return a; + } +} + +// Helper classes for code generation +namespace +{ + struct opt_false + { + enum { value = 0 }; + }; + + struct opt_true + { + enum { value = 1 }; + }; +} + +// Unicode utilities +namespace +{ + inline uint16_t endian_swap(uint16_t value) + { + return static_cast(((value & 0xff) << 8) | (value >> 8)); + } + + inline uint32_t endian_swap(uint32_t value) + { + return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); + } + + struct utf8_counter + { + typedef size_t value_type; + + static value_type low(value_type result, uint32_t ch) + { + // U+0000..U+007F + if (ch < 0x80) return result + 1; + // U+0080..U+07FF + else if (ch < 0x800) return result + 2; + // U+0800..U+FFFF + else return result + 3; + } + + static value_type high(value_type result, uint32_t) + { + // U+10000..U+10FFFF + return result + 4; + } + }; + + struct utf8_writer + { + typedef uint8_t* value_type; + + static value_type low(value_type result, uint32_t ch) + { + // U+0000..U+007F + if (ch < 0x80) + { + *result = static_cast(ch); + return result + 1; + } + // U+0080..U+07FF + else if (ch < 0x800) + { + result[0] = static_cast(0xC0 | (ch >> 6)); + result[1] = static_cast(0x80 | (ch & 0x3F)); + return result + 2; + } + // U+0800..U+FFFF + else + { + result[0] = static_cast(0xE0 | (ch >> 12)); + result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + result[2] = static_cast(0x80 | (ch & 0x3F)); + return result + 3; + } + } + + static value_type high(value_type result, uint32_t ch) + { + // U+10000..U+10FFFF + result[0] = static_cast(0xF0 | (ch >> 18)); + result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); + result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + result[3] = static_cast(0x80 | (ch & 0x3F)); + return result + 4; + } + + static value_type any(value_type result, uint32_t ch) + { + return (ch < 0x10000) ? low(result, ch) : high(result, ch); + } + }; + + struct utf16_counter + { + typedef size_t value_type; + + static value_type low(value_type result, uint32_t) + { + return result + 1; + } + + static value_type high(value_type result, uint32_t) + { + return result + 2; + } + }; + + struct utf16_writer + { + typedef uint16_t* value_type; + + static value_type low(value_type result, uint32_t ch) + { + *result = static_cast(ch); + + return result + 1; + } + + static value_type high(value_type result, uint32_t ch) + { + uint32_t msh = (uint32_t)(ch - 0x10000) >> 10; + uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff; + + result[0] = static_cast(0xD800 + msh); + result[1] = static_cast(0xDC00 + lsh); + + return result + 2; + } + + static value_type any(value_type result, uint32_t ch) + { + return (ch < 0x10000) ? low(result, ch) : high(result, ch); + } + }; + + struct utf32_counter + { + typedef size_t value_type; + + static value_type low(value_type result, uint32_t) + { + return result + 1; + } + + static value_type high(value_type result, uint32_t) + { + return result + 1; + } + }; + + struct utf32_writer + { + typedef uint32_t* value_type; + + static value_type low(value_type result, uint32_t ch) + { + *result = ch; + + return result + 1; + } + + static value_type high(value_type result, uint32_t ch) + { + *result = ch; + + return result + 1; + } + + static value_type any(value_type result, uint32_t ch) + { + *result = ch; + + return result + 1; + } + }; + + template struct wchar_selector; + + template <> struct wchar_selector<2> + { + typedef uint16_t type; + typedef utf16_counter counter; + typedef utf16_writer writer; + }; + + template <> struct wchar_selector<4> + { + typedef uint32_t type; + typedef utf32_counter counter; + typedef utf32_writer writer; + }; + + typedef wchar_selector::counter wchar_counter; + typedef wchar_selector::writer wchar_writer; + + template struct utf_decoder + { + static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) + { + const uint8_t utf8_byte_mask = 0x3f; + + while (size) + { + uint8_t lead = *data; + + // 0xxxxxxx -> U+0000..U+007F + if (lead < 0x80) + { + result = Traits::low(result, lead); + data += 1; + size -= 1; + + // process aligned single-byte (ascii) blocks + if ((reinterpret_cast(data) & 3) == 0) + { + while (size >= 4 && (*reinterpret_cast(data) & 0x80808080) == 0) + { + result = Traits::low(result, data[0]); + result = Traits::low(result, data[1]); + result = Traits::low(result, data[2]); + result = Traits::low(result, data[3]); + data += 4; + size -= 4; + } + } + } + // 110xxxxx -> U+0080..U+07FF + else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) + { + result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); + data += 2; + size -= 2; + } + // 1110xxxx -> U+0800-U+FFFF + else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) + { + result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); + data += 3; + size -= 3; + } + // 11110xxx -> U+10000..U+10FFFF + else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) + { + result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); + data += 4; + size -= 4; + } + // 10xxxxxx or 11111xxx -> invalid + else + { + data += 1; + size -= 1; + } + } + + return result; + } + + static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) + { + const uint16_t* end = data + size; + + while (data < end) + { + uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; + + // U+0000..U+D7FF + if (lead < 0xD800) + { + result = Traits::low(result, lead); + data += 1; + } + // U+E000..U+FFFF + else if ((unsigned)(lead - 0xE000) < 0x2000) + { + result = Traits::low(result, lead); + data += 1; + } + // surrogate pair lead + else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end) + { + uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; + + if ((unsigned)(next - 0xDC00) < 0x400) + { + result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); + data += 2; + } + else + { + data += 1; + } + } + else + { + data += 1; + } + } + + return result; + } + + static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) + { + const uint32_t* end = data + size; + + while (data < end) + { + uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; + + // U+0000..U+FFFF + if (lead < 0x10000) + { + result = Traits::low(result, lead); + data += 1; + } + // U+10000..U+10FFFF + else + { + result = Traits::high(result, lead); + data += 1; + } + } + + return result; + } + }; + + template inline void convert_utf_endian_swap(T* result, const T* data, size_t length) + { + for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]); + } + + inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) + { + for (size_t i = 0; i < length; ++i) result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); + } +} + +namespace +{ + using namespace pugi; + + enum chartype_t + { + ct_parse_pcdata = 1, // \0, &, \r, < + ct_parse_attr = 2, // \0, &, \r, ', " + ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab + ct_space = 8, // \r, \n, space, tab + ct_parse_cdata = 16, // \0, ], >, \r + ct_parse_comment = 32, // \0, -, >, \r + ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . + ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : + }; + + const unsigned char chartype_table[256] = + { + 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 + 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 + 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 + 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 + + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 + }; + +#ifdef PUGIXML_WCHAR_MODE + #define IS_CHARTYPE(c, ct) ((static_cast(c) < 128 ? chartype_table[static_cast(c)] : chartype_table[128]) & (ct)) +#else + #define IS_CHARTYPE(c, ct) (chartype_table[static_cast(c)] & (ct)) +#endif + + enum output_chartype_t + { + oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > + oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, " + }; + + const unsigned char output_chartype_table[256] = + { + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 + 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63 + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + +#ifdef PUGIXML_WCHAR_MODE + #define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast(c) < 128 ? output_chartype_table[static_cast(c)] : output_chartype_table[128]) & (ct)) +#else + #define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast(c)] & (ct)) +#endif + + bool is_little_endian() + { + unsigned int ui = 1; + + return *reinterpret_cast(&ui) == 1; + } + + xml_encoding get_wchar_encoding() + { + STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); + + if (sizeof(wchar_t) == 2) + return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + else + return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + } + + xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) + { + // replace wchar encoding with utf implementation + if (encoding == encoding_wchar) return get_wchar_encoding(); + + // replace utf16 encoding with utf16 with specific endianness + if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + // replace utf32 encoding with utf32 with specific endianness + if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + // only do autodetection if no explicit encoding is requested + if (encoding != encoding_auto) return encoding; + + // try to guess encoding (based on XML specification, Appendix F.1) + const uint8_t* data = static_cast(contents); + + // look for BOM in first few bytes + if (size > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0xfe && data[3] == 0xff) return encoding_utf32_be; + if (size > 4 && data[0] == 0xff && data[1] == 0xfe && data[2] == 0 && data[3] == 0) return encoding_utf32_le; + if (size > 2 && data[0] == 0xfe && data[1] == 0xff) return encoding_utf16_be; + if (size > 2 && data[0] == 0xff && data[1] == 0xfe) return encoding_utf16_le; + if (size > 3 && data[0] == 0xef && data[1] == 0xbb && data[2] == 0xbf) return encoding_utf8; + + // look for <, 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && data[3] == 0x3c) return encoding_utf32_be; + if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0 && data[3] == 0) return encoding_utf32_le; + if (size > 4 && data[0] == 0 && data[1] == 0x3c && data[2] == 0 && data[3] == 0x3f) return encoding_utf16_be; + if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0x3f && data[3] == 0) return encoding_utf16_le; + if (size > 4 && data[0] == 0x3c && data[1] == 0x3f && data[2] == 0x78 && data[3] == 0x6d) return encoding_utf8; + + // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) + if (size > 2 && data[0] == 0 && data[1] == 0x3c) return encoding_utf16_be; + if (size > 2 && data[0] == 0x3c && data[1] == 0) return encoding_utf16_le; + + // no known BOM detected, assume utf8 + return encoding_utf8; + } + + bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) + { + if (is_mutable) + { + out_buffer = static_cast(const_cast(contents)); + } + else + { + void* buffer = global_allocate(size > 0 ? size : 1); + if (!buffer) return false; + + memcpy(buffer, contents, size); + + out_buffer = static_cast(buffer); + } + + out_length = size / sizeof(char_t); + + return true; + } + +#ifdef PUGIXML_WCHAR_MODE + inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re) + { + return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || + (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); + } + + bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) + { + const char_t* data = static_cast(contents); + + if (is_mutable) + { + out_buffer = const_cast(data); + } + else + { + out_buffer = static_cast(global_allocate(size > 0 ? size : 1)); + if (!out_buffer) return false; + } + + out_length = size / sizeof(char_t); + + convert_wchar_endian_swap(out_buffer, data, out_length); + + return true; + } + + bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) + { + const uint8_t* data = static_cast(contents); + + // first pass: get length in wchar_t units + out_length = utf_decoder::decode_utf8_block(data, size, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf8 input to wchar_t + wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); + wchar_writer::value_type out_end = utf_decoder::decode_utf8_block(data, size, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint16_t* data = static_cast(contents); + size_t length = size / sizeof(uint16_t); + + // first pass: get length in wchar_t units + out_length = utf_decoder::decode_utf16_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf16 input to wchar_t + wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); + wchar_writer::value_type out_end = utf_decoder::decode_utf16_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint32_t* data = static_cast(contents); + size_t length = size / sizeof(uint32_t); + + // first pass: get length in wchar_t units + out_length = utf_decoder::decode_utf32_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf32 input to wchar_t + wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); + wchar_writer::value_type out_end = utf_decoder::decode_utf32_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) + { + // get native encoding + xml_encoding wchar_encoding = get_wchar_encoding(); + + // fast path: no conversion required + if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + + // only endian-swapping is required + if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); + + // source encoding is utf8 + if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size); + + // source encoding is utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return (native_encoding == encoding) ? + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); + } + + // source encoding is utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return (native_encoding == encoding) ? + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return false; + } +#else + template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint16_t* data = static_cast(contents); + size_t length = size / sizeof(uint16_t); + + // first pass: get length in utf8 units + out_length = utf_decoder::decode_utf16_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf16 input to utf8 + uint8_t* out_begin = reinterpret_cast(out_buffer); + uint8_t* out_end = utf_decoder::decode_utf16_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint32_t* data = static_cast(contents); + size_t length = size / sizeof(uint32_t); + + // first pass: get length in utf8 units + out_length = utf_decoder::decode_utf32_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf32 input to utf8 + uint8_t* out_begin = reinterpret_cast(out_buffer); + uint8_t* out_end = utf_decoder::decode_utf32_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) + { + // fast path: no conversion required + if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + + // source encoding is utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return (native_encoding == encoding) ? + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); + } + + // source encoding is utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return (native_encoding == encoding) ? + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return false; + } +#endif + + bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) + { + size_t source_length = impl::strlen(source); + + if (dest && impl::strlen(dest) >= source_length) + { + memcpy(dest, source, (source_length + 1) * sizeof(char_t)); + + return true; + } + else + { + xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator; + + char_t* buf = alloc->allocate_string(source_length + 1); + if (!buf) return false; + + memcpy(buf, source, (source_length + 1) * sizeof(char_t)); + + if (header & header_mask) alloc->deallocate_string(dest); + + dest = buf; + header |= header_mask; + + return true; + } + } + + struct gap + { + char_t* end; + size_t size; + + gap(): end(0), size(0) + { + } + + // Push new gap, move s count bytes further (skipping the gap). + // Collapse previous gap. + void push(char_t*& s, size_t count) + { + if (end) // there was a gap already; collapse it + { + // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) + memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); + } + + s += count; // end of current gap + + // "merge" two gaps + end = s; + size += count; + } + + // Collapse all gaps, return past-the-end pointer + char_t* flush(char_t* s) + { + if (end) + { + // Move [old_gap_end, current_pos) to [old_gap_start, ...) + memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); + + return s - size; + } + else return s; + } + }; + + char_t* strconv_escape(char_t* s, gap& g) + { + char_t* stre = s + 1; + + switch (*stre) + { + case '#': // &#... + { + unsigned int ucsc = 0; + + if (stre[1] == 'x') // &#x... (hex code) + { + stre += 2; + + char_t ch = *stre; + + if (ch == ';') return stre; + + for (;;) + { + if (static_cast(ch - '0') <= 9) + ucsc = 16 * ucsc + (ch - '0'); + else if (static_cast((ch | ' ') - 'a') <= 5) + ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); + else if (ch == ';') + break; + else // cancel + return stre; + + ch = *++stre; + } + + ++stre; + } + else // &#... (dec code) + { + char_t ch = *++stre; + + if (ch == ';') return stre; + + for (;;) + { + if (static_cast(ch - '0') <= 9) + ucsc = 10 * ucsc + (ch - '0'); + else if (ch == ';') + break; + else // cancel + return stre; + + ch = *++stre; + } + + ++stre; + } + + #ifdef PUGIXML_WCHAR_MODE + s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc)); + #else + s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); + #endif + + g.push(s, stre - s); + return stre; + } + case 'a': // &a + { + ++stre; + + if (*stre == 'm') // &am + { + if (*++stre == 'p' && *++stre == ';') // & + { + *s++ = '&'; + ++stre; + + g.push(s, stre - s); + return stre; + } + } + else if (*stre == 'p') // &ap + { + if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' + { + *s++ = '\''; + ++stre; + + g.push(s, stre - s); + return stre; + } + } + break; + } + case 'g': // &g + { + if (*++stre == 't' && *++stre == ';') // > + { + *s++ = '>'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + case 'l': // &l + { + if (*++stre == 't' && *++stre == ';') // < + { + *s++ = '<'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + case 'q': // &q + { + if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " + { + *s++ = '"'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + } + + return stre; + } + + // Utility macro for last character handling + #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) + + char_t* strconv_comment(char_t* s, char_t endch) + { + if (!*s) return 0; + + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s; + + if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair + { + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } + else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here + { + *g.flush(s) = 0; + + return s + (s[2] == '>' ? 3 : 2); + } + else if (*s == 0) + { + return 0; + } + else ++s; + } + } + + char_t* strconv_cdata(char_t* s, char_t endch) + { + if (!*s) return 0; + + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s; + + if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair + { + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } + else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here + { + *g.flush(s) = 0; + + return s + 1; + } + else if (*s == 0) + { + return 0; + } + else ++s; + } + } + + typedef char_t* (*strconv_pcdata_t)(char_t*); + + template struct strconv_pcdata_impl + { + static char_t* parse(char_t* s) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s; + + if (*s == '<') // PCDATA ends here + { + *g.flush(s) = 0; + + return s + 1; + } + else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair + { + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (*s == 0) + { + return s; + } + else ++s; + } + } + }; + + strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) + { + STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20); + + switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes) + { + case 0: return strconv_pcdata_impl::parse; + case 1: return strconv_pcdata_impl::parse; + case 2: return strconv_pcdata_impl::parse; + case 3: return strconv_pcdata_impl::parse; + default: return 0; // should not get here + } + } + + typedef char_t* (*strconv_attribute_t)(char_t*, char_t); + + template struct strconv_attribute_impl + { + static char_t* parse_wnorm(char_t* s, char_t end_quote) + { + gap g; + + // trim leading whitespaces + if (IS_CHARTYPE(*s, ct_space)) + { + char_t* str = s; + + do ++str; + while (IS_CHARTYPE(*str, ct_space)); + + g.push(s, str - s); + } + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s; + + if (*s == end_quote) + { + char_t* str = g.flush(s); + + do *str-- = 0; + while (IS_CHARTYPE(*str, ct_space)); + + return s + 1; + } + else if (IS_CHARTYPE(*s, ct_space)) + { + *s++ = ' '; + + if (IS_CHARTYPE(*s, ct_space)) + { + char_t* str = s + 1; + while (IS_CHARTYPE(*str, ct_space)) ++str; + + g.push(s, str - s); + } + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + + static char_t* parse_wconv(char_t* s, char_t end_quote) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s; + + if (*s == end_quote) + { + *g.flush(s) = 0; + + return s + 1; + } + else if (IS_CHARTYPE(*s, ct_space)) + { + if (*s == '\r') + { + *s++ = ' '; + + if (*s == '\n') g.push(s, 1); + } + else *s++ = ' '; + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + + static char_t* parse_eol(char_t* s, char_t end_quote) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; + + if (*s == end_quote) + { + *g.flush(s) = 0; + + return s + 1; + } + else if (*s == '\r') + { + *s++ = '\n'; + + if (*s == '\n') g.push(s, 1); + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + + static char_t* parse_simple(char_t* s, char_t end_quote) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; + + if (*s == end_quote) + { + *g.flush(s) = 0; + + return s + 1; + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + }; + + strconv_attribute_t get_strconv_attribute(unsigned int optmask) + { + STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); + + switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) + { + case 0: return strconv_attribute_impl::parse_simple; + case 1: return strconv_attribute_impl::parse_simple; + case 2: return strconv_attribute_impl::parse_eol; + case 3: return strconv_attribute_impl::parse_eol; + case 4: return strconv_attribute_impl::parse_wconv; + case 5: return strconv_attribute_impl::parse_wconv; + case 6: return strconv_attribute_impl::parse_wconv; + case 7: return strconv_attribute_impl::parse_wconv; + case 8: return strconv_attribute_impl::parse_wnorm; + case 9: return strconv_attribute_impl::parse_wnorm; + case 10: return strconv_attribute_impl::parse_wnorm; + case 11: return strconv_attribute_impl::parse_wnorm; + case 12: return strconv_attribute_impl::parse_wnorm; + case 13: return strconv_attribute_impl::parse_wnorm; + case 14: return strconv_attribute_impl::parse_wnorm; + case 15: return strconv_attribute_impl::parse_wnorm; + default: return 0; // should not get here + } + } + + inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) + { + xml_parse_result result = {status, offset, encoding_auto}; + return result; + } + + struct xml_parser + { + xml_allocator alloc; + char_t* error_offset; + jmp_buf error_handler; + + // Parser utilities. + #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; } + #define OPTSET(OPT) ( optmsk & OPT ) + #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) longjmp(error_handler, status_out_of_memory); } + #define POPNODE() { cursor = cursor->parent; } + #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; } + #define SCANWHILE(X) { while ((X)) ++s; } + #define ENDSEG() { ch = *s; *s = 0; ++s; } + #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err) + #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); } + + xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0) + { + } + + // DOCTYPE consists of nested sections of the following possible types: + // , , "...", '...' + // + // + // First group can not contain nested groups + // Second group can contain nested groups of the same type + // Third group can contain all other groups + void parse_doctype_primitive(char_t*& s) + { + if (*s == '"' || *s == '\'') + { + // quoted string + char_t ch = *s++; + SCANFOR(*s == ch); + if (!*s) THROW_ERROR(status_bad_doctype, s); + + s++; + } + else if (s[0] == '<' && s[1] == '?') + { + // + s += 2; + SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype + if (!*s) THROW_ERROR(status_bad_doctype, s); + + s += 2; + } + else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') + { + s += 4; + SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype + if (!*s) THROW_ERROR(status_bad_doctype, s); + + s += 4; + } + else THROW_ERROR(status_bad_doctype, s); + } + + void parse_doctype_ignore(char_t*& s) + { + assert(s[0] == '<' && s[1] == '!' && s[2] == '['); + s++; + + while (*s) + { + if (s[0] == '<' && s[1] == '!' && s[2] == '[') + { + // nested ignore section + parse_doctype_ignore(s); + } + else if (s[0] == ']' && s[1] == ']' && s[2] == '>') + { + // ignore section end + s += 3; + + return; + } + else s++; + } + + THROW_ERROR(status_bad_doctype, s); + } + + void parse_doctype(char_t*& s, char_t endch, bool toplevel) + { + assert(s[0] == '<' && s[1] == '!'); + s++; + + while (*s) + { + if (s[0] == '<' && s[1] == '!' && s[2] != '-') + { + if (s[2] == '[') + { + // ignore + parse_doctype_ignore(s); + } + else + { + // some control group + parse_doctype(s, endch, false); + } + } + else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') + { + // unknown tag (forbidden), or some primitive group + parse_doctype_primitive(s); + } + else if (*s == '>') + { + s++; + + return; + } + else s++; + } + + if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s); + } + + void parse_exclamation(char_t*& ref_s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) + { + // load into registers + char_t* s = ref_s; + + // parse node contents, starting with exclamation mark + ++s; + + if (*s == '-') // 'value = s; // Save the offset. + } + + if (OPTSET(parse_eol) && OPTSET(parse_comments)) + { + s = strconv_comment(s, endch); + + if (!s) THROW_ERROR(status_bad_comment, cursor->value); + } + else + { + // Scan for terminating '-->'. + SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')); + CHECK_ERROR(status_bad_comment, s); + + if (OPTSET(parse_comments)) + *s = 0; // Zero-terminate this segment at the first terminating '-'. + + s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. + } + + if (OPTSET(parse_comments)) + { + POPNODE(); // Pop since this is a standalone. + } + } + else THROW_ERROR(status_bad_comment, s); + } + else if (*s == '[') + { + // 'value = s; // Save the offset. + + if (OPTSET(parse_eol)) + { + s = strconv_cdata(s, endch); + + if (!s) THROW_ERROR(status_bad_cdata, cursor->value); + } + else + { + // Scan for terminating ']]>'. + SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); + CHECK_ERROR(status_bad_cdata, s); + + *s++ = 0; // Zero-terminate this segment. + } + + POPNODE(); // Pop since this is a standalone. + } + else // Flagged for discard, but we still have to scan for the terminator. + { + // Scan for terminating ']]>'. + SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); + CHECK_ERROR(status_bad_cdata, s); + + ++s; + } + + s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. + } + else THROW_ERROR(status_bad_cdata, s); + } + else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E')) + { + if (s[6] != 'E') THROW_ERROR(status_bad_doctype, s); + + s -= 2; + + parse_doctype(s, endch, true); + } + else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s); + else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s); + else THROW_ERROR(status_unrecognized_tag, s); + + // store from registers + ref_s = s; + } + + void parse_question(char_t*& ref_s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) + { + // load into registers + char_t* s = ref_s; + xml_node_struct* cursor = ref_cursor; + char_t ch = 0; + + // parse node contents, starting with question mark + ++s; + + // read PI target + char_t* target = s; + + if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s); + + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); + CHECK_ERROR(status_bad_pi, s); + + // determine node type; stricmp / strcasecmp is not portable + bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; + + if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi)) + { + if (declaration) + { + // disallow non top-level declarations + if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s); + + PUSHNODE(node_declaration); + } + else + { + PUSHNODE(node_pi); + } + + cursor->name = target; + + ENDSEG(); + + // parse value/attributes + if (ch == '?') + { + // empty node + if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); + s += (*s == '>'); + + POPNODE(); + } + else if (IS_CHARTYPE(ch, ct_space)) + { + SKIPWS(); + + // scan for tag end + char_t* value = s; + + SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); + CHECK_ERROR(status_bad_pi, s); + + if (declaration) + { + // replace ending ? with / so that 'element' terminates properly + *s = '/'; + + // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES + s = value; + } + else + { + // store value and step over > + cursor->value = value; + POPNODE(); + + ENDSEG(); + + s += (*s == '>'); + } + } + else THROW_ERROR(status_bad_pi, s); + } + else + { + // scan for tag end + SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); + CHECK_ERROR(status_bad_pi, s); + + s += (s[1] == '>' ? 2 : 1); + } + + // store from registers + ref_s = s; + ref_cursor = cursor; + } + + void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) + { + strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); + strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); + + char_t ch = 0; + xml_node_struct* cursor = xmldoc; + char_t* mark = s; + + while (*s != 0) + { + if (*s == '<') + { + ++s; + + LOC_TAG: + if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' + { + PUSHNODE(node_element); // Append a new node to the tree. + + cursor->name = s; + + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. + ENDSEG(); // Save char in 'ch', terminate & step over. + + if (ch == '>') + { + // end of tag + } + else if (IS_CHARTYPE(ch, ct_space)) + { + LOC_ATTRIBUTES: + while (true) + { + SKIPWS(); // Eat any whitespace. + + if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #... + { + xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. + if (!a) THROW_ERROR(status_out_of_memory, 0); + + a->name = s; // Save the offset. + + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. + CHECK_ERROR(status_bad_attribute, s); + + ENDSEG(); // Save char in 'ch', terminate & step over. + CHECK_ERROR(status_bad_attribute, s); + + if (IS_CHARTYPE(ch, ct_space)) + { + SKIPWS(); // Eat any whitespace. + CHECK_ERROR(status_bad_attribute, s); + + ch = *s; + ++s; + } + + if (ch == '=') // '<... #=...' + { + SKIPWS(); // Eat any whitespace. + + if (*s == '"' || *s == '\'') // '<... #="...' + { + ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. + ++s; // Step over the quote. + a->value = s; // Save the offset. + + s = strconv_attribute(s, ch); + + if (!s) THROW_ERROR(status_bad_attribute, a->value); + + // After this line the loop continues from the start; + // Whitespaces, / and > are ok, symbols and EOF are wrong, + // everything else will be detected + if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); + } + else THROW_ERROR(status_bad_attribute, s); + } + else THROW_ERROR(status_bad_attribute, s); + } + else if (*s == '/') + { + ++s; + + if (*s == '>') + { + POPNODE(); + s++; + break; + } + else if (*s == 0 && endch == '>') + { + POPNODE(); + break; + } + else THROW_ERROR(status_bad_start_element, s); + } + else if (*s == '>') + { + ++s; + + break; + } + else if (*s == 0 && endch == '>') + { + break; + } + else THROW_ERROR(status_bad_start_element, s); + } + + // !!! + } + else if (ch == '/') // '<#.../' + { + if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s); + + POPNODE(); // Pop. + + s += (*s == '>'); + } + else if (ch == 0) + { + // we stepped over null terminator, backtrack & handle closing tag + --s; + + if (endch != '>') THROW_ERROR(status_bad_start_element, s); + } + else THROW_ERROR(status_bad_start_element, s); + } + else if (*s == '/') + { + ++s; + + char_t* name = cursor->name; + if (!name) THROW_ERROR(status_end_element_mismatch, s); + + while (IS_CHARTYPE(*s, ct_symbol)) + { + if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s); + } + + if (*name) + { + if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s); + else THROW_ERROR(status_end_element_mismatch, s); + } + + POPNODE(); // Pop. + + SKIPWS(); + + if (*s == 0) + { + if (endch != '>') THROW_ERROR(status_bad_end_element, s); + } + else + { + if (*s != '>') THROW_ERROR(status_bad_end_element, s); + ++s; + } + } + else if (*s == '?') // 'header & xml_memory_page_type_mask) == node_declaration) goto LOC_ATTRIBUTES; + } + else if (*s == '!') // 'parent) + { + PUSHNODE(node_pcdata); // Append a new node on the tree. + cursor->value = s; // Save the offset. + + s = strconv_pcdata(s); + + POPNODE(); // Pop since this is a standalone. + + if (!*s) break; + } + else + { + SCANFOR(*s == '<'); // '...<' + if (!*s) break; + + ++s; + } + + // We're after '<' + goto LOC_TAG; + } + } + + // check that last tag is closed + if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s); + } + + static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* xmldoc, unsigned int optmsk) + { + // store buffer for offset_debug + static_cast(xmldoc)->buffer = buffer; + + // early-out for empty documents + if (length == 0) return make_parse_result(status_ok); + + // create parser on stack + xml_allocator& alloc = static_cast(xmldoc)->allocator; + + xml_parser parser(alloc); + + // save last character and make buffer zero-terminated (speeds up parsing) + char_t endch = buffer[length - 1]; + buffer[length - 1] = 0; + + // perform actual parsing + int error = setjmp(parser.error_handler); + + if (error == 0) + { + parser.parse(buffer, xmldoc, optmsk, endch); + } + + xml_parse_result result = make_parse_result(static_cast(error), parser.error_offset ? parser.error_offset - buffer : 0); + + // update allocator state + alloc = parser.alloc; + + // since we removed last character, we have to handle the only possible false positive + if (result && endch == '<') + { + // there's no possible well-formed document with < at the end + return make_parse_result(status_unrecognized_tag, length); + } + + return result; + } + }; + + // Output facilities + xml_encoding get_write_native_encoding() + { + #ifdef PUGIXML_WCHAR_MODE + return get_wchar_encoding(); + #else + return encoding_utf8; + #endif + } + + xml_encoding get_write_encoding(xml_encoding encoding) + { + // replace wchar encoding with utf implementation + if (encoding == encoding_wchar) return get_wchar_encoding(); + + // replace utf16 encoding with utf16 with specific endianness + if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + // replace utf32 encoding with utf32 with specific endianness + if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + // only do autodetection if no explicit encoding is requested + if (encoding != encoding_auto) return encoding; + + // assume utf8 encoding + return encoding_utf8; + } + +#ifdef PUGIXML_WCHAR_MODE + size_t get_valid_length(const char_t* data, size_t length) + { + assert(length > 0); + + // discard last character if it's the lead of a surrogate pair + return (sizeof(wchar_t) == 2 && (unsigned)(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; + } + + size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) + { + // only endian-swapping is required + if (need_endian_swap_utf(encoding, get_wchar_encoding())) + { + convert_wchar_endian_swap(reinterpret_cast(result), data, length); + + return length * sizeof(char_t); + } + + // convert to utf8 + if (encoding == encoding_utf8) + { + uint8_t* dest = reinterpret_cast(result); + + uint8_t* end = sizeof(wchar_t) == 2 ? + utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest) : + utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); + + return static_cast(end - dest); + } + + // convert to utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + uint16_t* dest = reinterpret_cast(result); + + // convert to native utf16 + uint16_t* end = utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint16_t); + } + + // convert to utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + uint32_t* dest = reinterpret_cast(result); + + // convert to native utf32 + uint32_t* end = utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint32_t); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return 0; + } +#else + size_t get_valid_length(const char_t* data, size_t length) + { + assert(length > 4); + + for (size_t i = 1; i <= 4; ++i) + { + uint8_t ch = static_cast(data[length - i]); + + // either a standalone character or a leading one + if ((ch & 0xc0) != 0x80) return length - i; + } + + // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk + return length; + } + + size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) + { + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + uint16_t* dest = reinterpret_cast(result); + + // convert to native utf16 + uint16_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint16_t); + } + + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + uint32_t* dest = reinterpret_cast(result); + + // convert to native utf32 + uint32_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint32_t); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return 0; + } +#endif + + class xml_buffered_writer + { + xml_buffered_writer(const xml_buffered_writer&); + xml_buffered_writer& operator=(const xml_buffered_writer&); + + public: + xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding)) + { + } + + ~xml_buffered_writer() + { + flush(); + } + + void flush() + { + flush(buffer, bufsize); + bufsize = 0; + } + + void flush(const char_t* data, size_t size) + { + if (size == 0) return; + + // fast path, just write data + if (encoding == get_write_native_encoding()) + writer.write(data, size * sizeof(char_t)); + else + { + // convert chunk + size_t result = convert_buffer(scratch, data, size, encoding); + assert(result <= sizeof(scratch)); + + // write data + writer.write(scratch, result); + } + } + + void write(const char_t* data, size_t length) + { + if (bufsize + length > bufcapacity) + { + // flush the remaining buffer contents + flush(); + + // handle large chunks + if (length > bufcapacity) + { + if (encoding == get_write_native_encoding()) + { + // fast path, can just write data chunk + writer.write(data, length * sizeof(char_t)); + return; + } + + // need to convert in suitable chunks + while (length > bufcapacity) + { + // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer + // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) + size_t chunk_size = get_valid_length(data, bufcapacity); + + // convert chunk and write + flush(data, chunk_size); + + // iterate + data += chunk_size; + length -= chunk_size; + } + + // small tail is copied below + bufsize = 0; + } + } + + memcpy(buffer + bufsize, data, length * sizeof(char_t)); + bufsize += length; + } + + void write(const char_t* data) + { + write(data, impl::strlen(data)); + } + + void write(char_t d0) + { + if (bufsize + 1 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + bufsize += 1; + } + + void write(char_t d0, char_t d1) + { + if (bufsize + 2 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + bufsize += 2; + } + + void write(char_t d0, char_t d1, char_t d2) + { + if (bufsize + 3 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + bufsize += 3; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3) + { + if (bufsize + 4 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + buffer[bufsize + 3] = d3; + bufsize += 4; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) + { + if (bufsize + 5 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + buffer[bufsize + 3] = d3; + buffer[bufsize + 4] = d4; + bufsize += 5; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) + { + if (bufsize + 6 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + buffer[bufsize + 3] = d3; + buffer[bufsize + 4] = d4; + buffer[bufsize + 5] = d5; + bufsize += 6; + } + + // utf8 maximum expansion: x4 (-> utf32) + // utf16 maximum expansion: x2 (-> utf32) + // utf32 maximum expansion: x1 + enum { bufcapacity = 2048 }; + + char_t buffer[bufcapacity]; + char scratch[4 * bufcapacity]; + + xml_writer& writer; + size_t bufsize; + xml_encoding encoding; + }; + + void write_bom(xml_writer& writer, xml_encoding encoding) + { + switch (encoding) + { + case encoding_utf8: + writer.write("\xef\xbb\xbf", 3); + break; + + case encoding_utf16_be: + writer.write("\xfe\xff", 2); + break; + + case encoding_utf16_le: + writer.write("\xff\xfe", 2); + break; + + case encoding_utf32_be: + writer.write("\x00\x00\xfe\xff", 4); + break; + + case encoding_utf32_le: + writer.write("\xff\xfe\x00\x00", 4); + break; + + default: + // invalid encoding (this should not happen) + assert(false); + } + } + + void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type) + { + while (*s) + { + const char_t* prev = s; + + // While *s is a usual symbol + while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s; + + writer.write(prev, static_cast(s - prev)); + + switch (*s) + { + case 0: break; + case '&': + writer.write('&', 'a', 'm', 'p', ';'); + ++s; + break; + case '<': + writer.write('&', 'l', 't', ';'); + ++s; + break; + case '>': + writer.write('&', 'g', 't', ';'); + ++s; + break; + case '"': + writer.write('&', 'q', 'u', 'o', 't', ';'); + ++s; + break; + default: // s is not a usual symbol + { + unsigned int ch = static_cast(*s++); + assert(ch < 32); + + writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';'); + } + } + } + } + + void node_output_attributes(xml_buffered_writer& writer, const xml_node& node) + { + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + + for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) + { + writer.write(' '); + writer.write(a.name()[0] ? a.name() : default_name); + writer.write('=', '"'); + + text_output_escaped(writer, a.value(), oct_special_attr); + + writer.write('"'); + } + } + + void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth) + { + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + + if ((flags & format_indent) != 0 && (flags & format_raw) == 0) + for (unsigned int i = 0; i < depth; ++i) writer.write(indent); + + switch (node.type()) + { + case node_document: + { + for (xml_node n = node.first_child(); n; n = n.next_sibling()) + node_output(writer, n, indent, flags, depth); + break; + } + + case node_element: + { + const char_t* name = node.name()[0] ? node.name() : default_name; + + writer.write('<'); + writer.write(name); + + node_output_attributes(writer, node); + + if (flags & format_raw) + { + if (!node.first_child()) + writer.write(' ', '/', '>'); + else + { + writer.write('>'); + + for (xml_node n = node.first_child(); n; n = n.next_sibling()) + node_output(writer, n, indent, flags, depth + 1); + + writer.write('<', '/'); + writer.write(name); + writer.write('>'); + } + } + else if (!node.first_child()) + writer.write(' ', '/', '>', '\n'); + else if (node.first_child() == node.last_child() && node.first_child().type() == node_pcdata) + { + writer.write('>'); + + text_output_escaped(writer, node.first_child().value(), oct_special_pcdata); + + writer.write('<', '/'); + writer.write(name); + writer.write('>', '\n'); + } + else + { + writer.write('>', '\n'); + + for (xml_node n = node.first_child(); n; n = n.next_sibling()) + node_output(writer, n, indent, flags, depth + 1); + + if ((flags & format_indent) != 0 && (flags & format_raw) == 0) + for (unsigned int i = 0; i < depth; ++i) writer.write(indent); + + writer.write('<', '/'); + writer.write(name); + writer.write('>', '\n'); + } + + break; + } + + case node_pcdata: + text_output_escaped(writer, node.value(), oct_special_pcdata); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + case node_cdata: + writer.write('<', '!', '[', 'C', 'D'); + writer.write('A', 'T', 'A', '['); + writer.write(node.value()); + writer.write(']', ']', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + case node_comment: + writer.write('<', '!', '-', '-'); + writer.write(node.value()); + writer.write('-', '-', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + case node_pi: + case node_declaration: + writer.write('<', '?'); + writer.write(node.name()[0] ? node.name() : default_name); + + if (node.type() == node_declaration) + { + node_output_attributes(writer, node); + } + else if (node.value()[0]) + { + writer.write(' '); + writer.write(node.value()); + } + + writer.write('?', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + default: + assert(false); + } + } + + inline bool has_declaration(const xml_node& node) + { + for (xml_node child = node.first_child(); child; child = child.next_sibling()) + { + xml_node_type type = child.type(); + + if (type == node_declaration) return true; + if (type == node_element) return false; + } + + return false; + } + + inline bool allow_insert_child(xml_node_type parent, xml_node_type child) + { + if (parent != node_document && parent != node_element) return false; + if (child == node_document || child == node_null) return false; + if (parent != node_document && child == node_declaration) return false; + + return true; + } + + void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip) + { + assert(dest.type() == source.type()); + + switch (source.type()) + { + case node_element: + { + dest.set_name(source.name()); + + for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) + dest.append_attribute(a.name()).set_value(a.value()); + + for (xml_node c = source.first_child(); c; c = c.next_sibling()) + { + if (c == skip) continue; + + xml_node cc = dest.append_child(c.type()); + assert(cc); + + recursive_copy_skip(cc, c, skip); + } + + break; + } + + case node_pcdata: + case node_cdata: + case node_comment: + dest.set_value(source.value()); + break; + + case node_pi: + dest.set_name(source.name()); + dest.set_value(source.value()); + break; + + case node_declaration: + { + dest.set_name(source.name()); + + for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) + dest.append_attribute(a.name()).set_value(a.value()); + + break; + } + + default: + assert(false); + } + } + +#ifndef PUGIXML_NO_STL + template xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream >& stream, unsigned int options, xml_encoding encoding) + { + if (!stream.good()) return make_parse_result(status_io_error); + + // get length of remaining data in stream + std::streamoff pos = stream.tellg(); + stream.seekg(0, std::ios::end); + std::streamoff length = stream.tellg() - pos; + stream.seekg(pos, std::ios::beg); + + if (!stream.good() || pos < 0 || length < 0) return make_parse_result(status_io_error); + + // read stream data into memory + size_t read_length = static_cast(length); + + T* s = static_cast(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T))); + if (!s) return make_parse_result(status_out_of_memory); + + stream.read(s, static_cast(read_length)); + + // check for errors + size_t actual_length = static_cast(stream.gcount()); + assert(actual_length <= read_length); + + if (read_length > 0 && actual_length == 0) + { + global_deallocate(s); + return make_parse_result(status_io_error); + } + + // load data from buffer + return doc.load_buffer_inplace_own(s, actual_length * sizeof(T), options, encoding); + } +#endif +} + +namespace pugi +{ + xml_writer_file::xml_writer_file(void* file): file(file) + { + } + + void xml_writer_file::write(const void* data, size_t size) + { + fwrite(data, size, 1, static_cast(file)); + } + +#ifndef PUGIXML_NO_STL + xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(&stream), wide_stream(0) + { + } + + xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(0), wide_stream(&stream) + { + } + + void xml_writer_stream::write(const void* data, size_t size) + { + if (narrow_stream) + { + assert(!wide_stream); + narrow_stream->write(reinterpret_cast(data), static_cast(size)); + } + else + { + assert(wide_stream); + assert(size % sizeof(wchar_t) == 0); + + wide_stream->write(reinterpret_cast(data), static_cast(size / sizeof(wchar_t))); + } + } +#endif + + xml_tree_walker::xml_tree_walker(): _depth(0) + { + } + + xml_tree_walker::~xml_tree_walker() + { + } + + int xml_tree_walker::depth() const + { + return _depth; + } + + bool xml_tree_walker::begin(xml_node&) + { + return true; + } + + bool xml_tree_walker::end(xml_node&) + { + return true; + } + + xml_attribute::xml_attribute(): _attr(0) + { + } + + xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) + { + } + + xml_attribute::operator xml_attribute::unspecified_bool_type() const + { +#ifdef __MWERKS__ + return _attr ? &xml_attribute::empty : 0; +#else + return _attr ? &xml_attribute::_attr : 0; +#endif + } + + bool xml_attribute::operator!() const + { + return !_attr; + } + + bool xml_attribute::operator==(const xml_attribute& r) const + { + return (_attr == r._attr); + } + + bool xml_attribute::operator!=(const xml_attribute& r) const + { + return (_attr != r._attr); + } + + bool xml_attribute::operator<(const xml_attribute& r) const + { + return (_attr < r._attr); + } + + bool xml_attribute::operator>(const xml_attribute& r) const + { + return (_attr > r._attr); + } + + bool xml_attribute::operator<=(const xml_attribute& r) const + { + return (_attr <= r._attr); + } + + bool xml_attribute::operator>=(const xml_attribute& r) const + { + return (_attr >= r._attr); + } + + xml_attribute xml_attribute::next_attribute() const + { + return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); + } + + xml_attribute xml_attribute::previous_attribute() const + { + return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); + } + + int xml_attribute::as_int() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return (int)wcstol(_attr->value, 0, 10); + #else + return (int)strtol(_attr->value, 0, 10); + #endif + } + + unsigned int xml_attribute::as_uint() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return (unsigned int)wcstoul(_attr->value, 0, 10); + #else + return (unsigned int)strtoul(_attr->value, 0, 10); + #endif + } + + double xml_attribute::as_double() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return wcstod(_attr->value, 0); + #else + return strtod(_attr->value, 0); + #endif + } + + float xml_attribute::as_float() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return (float)wcstod(_attr->value, 0); + #else + return (float)strtod(_attr->value, 0); + #endif + } + + bool xml_attribute::as_bool() const + { + if (!_attr || !_attr->value) return false; + + // only look at first char + char_t first = *_attr->value; + + // 1*, t* (true), T* (True), y* (yes), Y* (YES) + return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); + } + + bool xml_attribute::empty() const + { + return !_attr; + } + + const char_t* xml_attribute::name() const + { + return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT(""); + } + + const char_t* xml_attribute::value() const + { + return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT(""); + } + + unsigned int xml_attribute::document_order() const + { + return 0; + } + + xml_attribute& xml_attribute::operator=(const char_t* rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(int rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(unsigned int rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(double rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(bool rhs) + { + set_value(rhs); + return *this; + } + + bool xml_attribute::set_name(const char_t* rhs) + { + if (!_attr) return false; + + return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs); + } + + bool xml_attribute::set_value(const char_t* rhs) + { + if (!_attr) return false; + + return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs); + } + + bool xml_attribute::set_value(int rhs) + { + char buf[128]; + sprintf(buf, "%d", rhs); + + #ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + impl::widen_ascii(wbuf, buf); + + return set_value(wbuf); + #else + return set_value(buf); + #endif + } + + bool xml_attribute::set_value(unsigned int rhs) + { + char buf[128]; + sprintf(buf, "%u", rhs); + + #ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + impl::widen_ascii(wbuf, buf); + + return set_value(wbuf); + #else + return set_value(buf); + #endif + } + + bool xml_attribute::set_value(double rhs) + { + char buf[128]; + sprintf(buf, "%g", rhs); + + #ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + impl::widen_ascii(wbuf, buf); + + return set_value(wbuf); + #else + return set_value(buf); + #endif + } + + bool xml_attribute::set_value(bool rhs) + { + return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); + } + +#ifdef __BORLANDC__ + bool operator&&(const xml_attribute& lhs, bool rhs) + { + return (bool)lhs && rhs; + } + + bool operator||(const xml_attribute& lhs, bool rhs) + { + return (bool)lhs || rhs; + } +#endif + + xml_node::xml_node(): _root(0) + { + } + + xml_node::xml_node(xml_node_struct* p): _root(p) + { + } + + xml_node::operator xml_node::unspecified_bool_type() const + { +#ifdef __MWERKS__ + return _root ? &xml_node::empty : 0; +#else + return _root ? &xml_node::_root : 0; +#endif + } + + bool xml_node::operator!() const + { + return !_root; + } + + xml_node::iterator xml_node::begin() const + { + return iterator(_root ? _root->first_child : 0, _root); + } + + xml_node::iterator xml_node::end() const + { + return iterator(0, _root); + } + + xml_node::attribute_iterator xml_node::attributes_begin() const + { + return attribute_iterator(_root ? _root->first_attribute : 0, _root); + } + + xml_node::attribute_iterator xml_node::attributes_end() const + { + return attribute_iterator(0, _root); + } + + bool xml_node::operator==(const xml_node& r) const + { + return (_root == r._root); + } + + bool xml_node::operator!=(const xml_node& r) const + { + return (_root != r._root); + } + + bool xml_node::operator<(const xml_node& r) const + { + return (_root < r._root); + } + + bool xml_node::operator>(const xml_node& r) const + { + return (_root > r._root); + } + + bool xml_node::operator<=(const xml_node& r) const + { + return (_root <= r._root); + } + + bool xml_node::operator>=(const xml_node& r) const + { + return (_root >= r._root); + } + + bool xml_node::empty() const + { + return !_root; + } + + const char_t* xml_node::name() const + { + return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); + } + + xml_node_type xml_node::type() const + { + return _root ? static_cast(_root->header & xml_memory_page_type_mask) : node_null; + } + + const char_t* xml_node::value() const + { + return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); + } + + xml_node xml_node::child(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequal(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::child_w(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_attribute xml_node::attribute(const char_t* name) const + { + if (!_root) return xml_attribute(); + + for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) + if (i->name && impl::strequal(name, i->name)) + return xml_attribute(i); + + return xml_attribute(); + } + + xml_attribute xml_node::attribute_w(const char_t* name) const + { + if (!_root) return xml_attribute(); + + for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) + if (i->name && impl::strequalwild(name, i->name)) + return xml_attribute(i); + + return xml_attribute(); + } + + xml_node xml_node::next_sibling(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) + if (i->name && impl::strequal(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::next_sibling_w(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::next_sibling() const + { + if (!_root) return xml_node(); + + if (_root->next_sibling) return xml_node(_root->next_sibling); + else return xml_node(); + } + + xml_node xml_node::previous_sibling(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) + if (i->name && impl::strequal(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::previous_sibling_w(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::previous_sibling() const + { + if (!_root) return xml_node(); + + if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); + else return xml_node(); + } + + xml_node xml_node::parent() const + { + return _root ? xml_node(_root->parent) : xml_node(); + } + + xml_node xml_node::root() const + { + xml_node_struct* r = _root; + + while (r && r->parent) r = r->parent; + + return xml_node(r); + } + + const char_t* xml_node::child_value() const + { + if (!_root) return PUGIXML_TEXT(""); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + { + xml_node_type type = static_cast(i->header & xml_memory_page_type_mask); + + if (i->value && (type == node_pcdata || type == node_cdata)) + return i->value; + } + + return PUGIXML_TEXT(""); + } + + const char_t* xml_node::child_value(const char_t* name) const + { + return child(name).child_value(); + } + + const char_t* xml_node::child_value_w(const char_t* name) const + { + if (!_root) return PUGIXML_TEXT(""); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i).child_value(); + + return PUGIXML_TEXT(""); + } + + xml_attribute xml_node::first_attribute() const + { + return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); + } + + xml_attribute xml_node::last_attribute() const + { + return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); + } + + xml_node xml_node::first_child() const + { + return _root ? xml_node(_root->first_child) : xml_node(); + } + + xml_node xml_node::last_child() const + { + return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); + } + + bool xml_node::set_name(const char_t* rhs) + { + switch (type()) + { + case node_pi: + case node_declaration: + case node_element: + return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs); + + default: + return false; + } + } + + bool xml_node::set_value(const char_t* rhs) + { + switch (type()) + { + case node_pi: + case node_cdata: + case node_pcdata: + case node_comment: + return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs); + + default: + return false; + } + } + + xml_attribute xml_node::append_attribute(const char_t* name) + { + if (type() != node_element && type() != node_declaration) return xml_attribute(); + + xml_attribute a(append_attribute_ll(_root, get_allocator(_root))); + if (!a) return xml_attribute(); + + a.set_name(name); + + return a; + } + + xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr) + { + if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); + + // check that attribute belongs to *this + xml_attribute_struct* cur = attr._attr; + + while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; + + if (cur != _root->first_attribute) return xml_attribute(); + + xml_attribute a(allocate_attribute(get_allocator(_root))); + if (!a) return xml_attribute(); + + a.set_name(name); + + if (attr._attr->prev_attribute_c->next_attribute) + attr._attr->prev_attribute_c->next_attribute = a._attr; + else + _root->first_attribute = a._attr; + + a._attr->prev_attribute_c = attr._attr->prev_attribute_c; + a._attr->next_attribute = attr._attr; + attr._attr->prev_attribute_c = a._attr; + + return a; + } + + xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr) + { + if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); + + // check that attribute belongs to *this + xml_attribute_struct* cur = attr._attr; + + while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; + + if (cur != _root->first_attribute) return xml_attribute(); + + xml_attribute a(allocate_attribute(get_allocator(_root))); + if (!a) return xml_attribute(); + + a.set_name(name); + + if (attr._attr->next_attribute) + attr._attr->next_attribute->prev_attribute_c = a._attr; + else + _root->first_attribute->prev_attribute_c = a._attr; + + a._attr->next_attribute = attr._attr->next_attribute; + a._attr->prev_attribute_c = attr._attr; + attr._attr->next_attribute = a._attr; + + return a; + } + + xml_attribute xml_node::append_copy(const xml_attribute& proto) + { + if (!proto) return xml_attribute(); + + xml_attribute result = append_attribute(proto.name()); + result.set_value(proto.value()); + + return result; + } + + xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) + { + if (!proto) return xml_attribute(); + + xml_attribute result = insert_attribute_after(proto.name(), attr); + result.set_value(proto.value()); + + return result; + } + + xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) + { + if (!proto) return xml_attribute(); + + xml_attribute result = insert_attribute_before(proto.name(), attr); + result.set_value(proto.value()); + + return result; + } + + xml_node xml_node::append_child(xml_node_type type) + { + if (!allow_insert_child(this->type(), type)) return xml_node(); + + xml_node n(append_node(_root, get_allocator(_root), type)); + if (!n) return xml_node(); + + if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + + return n; + } + + xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node) + { + if (!allow_insert_child(this->type(), type)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); + + xml_node n(allocate_node(get_allocator(_root), type)); + if (!n) return xml_node(); + + n._root->parent = _root; + + if (node._root->prev_sibling_c->next_sibling) + node._root->prev_sibling_c->next_sibling = n._root; + else + _root->first_child = n._root; + + n._root->prev_sibling_c = node._root->prev_sibling_c; + n._root->next_sibling = node._root; + node._root->prev_sibling_c = n._root; + + if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + + return n; + } + + xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node) + { + if (!allow_insert_child(this->type(), type)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); + + xml_node n(allocate_node(get_allocator(_root), type)); + if (!n) return xml_node(); + + n._root->parent = _root; + + if (node._root->next_sibling) + node._root->next_sibling->prev_sibling_c = n._root; + else + _root->first_child->prev_sibling_c = n._root; + + n._root->next_sibling = node._root->next_sibling; + n._root->prev_sibling_c = node._root; + node._root->next_sibling = n._root; + + if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + + return n; + } + + xml_node xml_node::append_copy(const xml_node& proto) + { + xml_node result = append_child(proto.type()); + + if (result) recursive_copy_skip(result, proto, result); + + return result; + } + + xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) + { + xml_node result = insert_child_after(proto.type(), node); + + if (result) recursive_copy_skip(result, proto, result); + + return result; + } + + xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) + { + xml_node result = insert_child_before(proto.type(), node); + + if (result) recursive_copy_skip(result, proto, result); + + return result; + } + + bool xml_node::remove_attribute(const char_t* name) + { + return remove_attribute(attribute(name)); + } + + bool xml_node::remove_attribute(const xml_attribute& a) + { + if (!_root || !a._attr) return false; + + // check that attribute belongs to *this + xml_attribute_struct* attr = a._attr; + + while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c; + + if (attr != _root->first_attribute) return false; + + if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c; + else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c; + + if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute; + else _root->first_attribute = a._attr->next_attribute; + + destroy_attribute(a._attr, get_allocator(_root)); + + return true; + } + + bool xml_node::remove_child(const char_t* name) + { + return remove_child(child(name)); + } + + bool xml_node::remove_child(const xml_node& n) + { + if (!_root || !n._root || n._root->parent != _root) return false; + + if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c; + else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c; + + if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling; + else _root->first_child = n._root->next_sibling; + + destroy_node(n._root, get_allocator(_root)); + + return true; + } + + xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequal(name, i->name)) + { + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) + return xml_node(i); + } + + return xml_node(); + } + + xml_node xml_node::find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) + { + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) + return xml_node(i); + } + + return xml_node(); + } + + xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) + return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) + return xml_node(i); + + return xml_node(); + } + +#ifndef PUGIXML_NO_STL + string_t xml_node::path(char_t delimiter) const + { + string_t path; + + xml_node cursor = *this; // Make a copy. + + path = cursor.name(); + + while (cursor.parent()) + { + cursor = cursor.parent(); + + string_t temp = cursor.name(); + temp += delimiter; + temp += path; + path.swap(temp); + } + + return path; + } +#endif + + xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const + { + xml_node found = *this; // Current search context. + + if (!_root || !path || !path[0]) return found; + + if (path[0] == delimiter) + { + // Absolute path; e.g. '/foo/bar' + while (found.parent()) found = found.parent(); + ++path; + } + + const char_t* path_segment = path; + + while (*path_segment == delimiter) ++path_segment; + + const char_t* path_segment_end = path_segment; + + while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; + + if (path_segment == path_segment_end) return found; + + const char_t* next_segment = path_segment_end; + + while (*next_segment == delimiter) ++next_segment; + + if (*path_segment == '.' && path_segment + 1 == path_segment_end) + return found.first_element_by_path(next_segment, delimiter); + else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) + return found.parent().first_element_by_path(next_segment, delimiter); + else + { + for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) + { + if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) + { + xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); + + if (subsearch) return subsearch; + } + } + + return xml_node(); + } + } + + bool xml_node::traverse(xml_tree_walker& walker) + { + walker._depth = -1; + + xml_node arg_begin = *this; + if (!walker.begin(arg_begin)) return false; + + xml_node cur = first_child(); + + if (cur) + { + ++walker._depth; + + do + { + xml_node arg_for_each = cur; + if (!walker.for_each(arg_for_each)) + return false; + + if (cur.first_child()) + { + ++walker._depth; + cur = cur.first_child(); + } + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + // Borland C++ workaround + while (!cur.next_sibling() && cur != *this && (bool)cur.parent()) + { + --walker._depth; + cur = cur.parent(); + } + + if (cur != *this) + cur = cur.next_sibling(); + } + } + while (cur && cur != *this); + } + + assert(walker._depth == -1); + + xml_node arg_end = *this; + return walker.end(arg_end); + } + + unsigned int xml_node::document_order() const + { + return 0; + } + + void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const + { + if (!_root) return; + + xml_buffered_writer buffered_writer(writer, encoding); + + node_output(buffered_writer, *this, indent, flags, depth); + } + +#ifndef PUGIXML_NO_STL + void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const + { + if (!_root) return; + + xml_writer_stream writer(stream); + + print(writer, indent, flags, encoding, depth); + } + + void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const + { + if (!_root) return; + + xml_writer_stream writer(stream); + + print(writer, indent, flags, encoding_wchar, depth); + } +#endif + + ptrdiff_t xml_node::offset_debug() const + { + xml_node_struct* r = root()._root; + + if (!r) return -1; + + const char_t* buffer = static_cast(r)->buffer; + + if (!buffer) return -1; + + switch (type()) + { + case node_document: + return 0; + + case node_element: + case node_declaration: + case node_pi: + return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer; + + case node_pcdata: + case node_cdata: + case node_comment: + return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer; + + default: + return -1; + } + } + +#ifdef __BORLANDC__ + bool operator&&(const xml_node& lhs, bool rhs) + { + return (bool)lhs && rhs; + } + + bool operator||(const xml_node& lhs, bool rhs) + { + return (bool)lhs || rhs; + } +#endif + + xml_node_iterator::xml_node_iterator() + { + } + + xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) + { + } + + xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) + { + } + + bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const + { + return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; + } + + bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const + { + return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; + } + + xml_node& xml_node_iterator::operator*() + { + assert(_wrap._root); + return _wrap; + } + + xml_node* xml_node_iterator::operator->() + { + assert(_wrap._root); + return &_wrap; + } + + const xml_node_iterator& xml_node_iterator::operator++() + { + assert(_wrap._root); + _wrap._root = _wrap._root->next_sibling; + return *this; + } + + xml_node_iterator xml_node_iterator::operator++(int) + { + xml_node_iterator temp = *this; + ++*this; + return temp; + } + + const xml_node_iterator& xml_node_iterator::operator--() + { + if (_wrap._root) _wrap = _wrap.previous_sibling(); + else _wrap = _parent.last_child(); + return *this; + } + + xml_node_iterator xml_node_iterator::operator--(int) + { + xml_node_iterator temp = *this; + --*this; + return temp; + } + + xml_attribute_iterator::xml_attribute_iterator() + { + } + + xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) + { + } + + xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) + { + } + + bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const + { + return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; + } + + bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const + { + return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; + } + + xml_attribute& xml_attribute_iterator::operator*() + { + assert(_wrap._attr); + return _wrap; + } + + xml_attribute* xml_attribute_iterator::operator->() + { + assert(_wrap._attr); + return &_wrap; + } + + const xml_attribute_iterator& xml_attribute_iterator::operator++() + { + assert(_wrap._attr); + _wrap._attr = _wrap._attr->next_attribute; + return *this; + } + + xml_attribute_iterator xml_attribute_iterator::operator++(int) + { + xml_attribute_iterator temp = *this; + ++*this; + return temp; + } + + const xml_attribute_iterator& xml_attribute_iterator::operator--() + { + if (_wrap._attr) _wrap = _wrap.previous_attribute(); + else _wrap = _parent.last_attribute(); + return *this; + } + + xml_attribute_iterator xml_attribute_iterator::operator--(int) + { + xml_attribute_iterator temp = *this; + --*this; + return temp; + } + + const char* xml_parse_result::description() const + { + switch (status) + { + case status_ok: return "No error"; + + case status_file_not_found: return "File was not found"; + case status_io_error: return "Error reading from file/stream"; + case status_out_of_memory: return "Could not allocate memory"; + case status_internal_error: return "Internal error occurred"; + + case status_unrecognized_tag: return "Could not determine tag type"; + + case status_bad_pi: return "Error parsing document declaration/processing instruction"; + case status_bad_comment: return "Error parsing comment"; + case status_bad_cdata: return "Error parsing CDATA section"; + case status_bad_doctype: return "Error parsing document type declaration"; + case status_bad_pcdata: return "Error parsing PCDATA section"; + case status_bad_start_element: return "Error parsing start element tag"; + case status_bad_attribute: return "Error parsing element attribute"; + case status_bad_end_element: return "Error parsing end element tag"; + case status_end_element_mismatch: return "Start-end tags mismatch"; + + default: return "Unknown error"; + } + } + + xml_document::xml_document(): _buffer(0) + { + create(); + } + + xml_document::~xml_document() + { + destroy(); + } + + void xml_document::create() + { + destroy(); + + // initialize sentinel page + STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory)); + + // align upwards to page boundary + void* page_memory = reinterpret_cast((reinterpret_cast(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); + + // prepare page structure + xml_memory_page* page = xml_memory_page::construct(page_memory); + + page->busy_size = xml_memory_page_size; + + // allocate new root + _root = new (page->data) xml_document_struct(page); + _root->prev_sibling_c = _root; + + // setup allocator + xml_allocator& a = static_cast(_root)->allocator; + a = xml_allocator(page); + + // setup sentinel page + page->allocator = &a; + } + + void xml_document::destroy() + { + // destroy static storage + if (_buffer) + { + global_deallocate(_buffer); + _buffer = 0; + } + + // destroy dynamic storage, leave sentinel page (it's in static memory) + if (_root) + { + xml_memory_page* root_page = reinterpret_cast(_root->header & xml_memory_page_pointer_mask); + assert(root_page && !root_page->prev && !root_page->memory); + + // destroy all pages + for (xml_memory_page* page = root_page->next; page; ) + { + xml_memory_page* next = page->next; + + xml_allocator::deallocate_page(page); + + page = next; + } + + // cleanup root page + root_page->allocator = 0; + root_page->next = 0; + root_page->busy_size = root_page->freed_size = 0; + + _root = 0; + } + } + +#ifndef PUGIXML_NO_STL + xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) + { + create(); + + return load_stream_impl(*this, stream, options, encoding); + } + + xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) + { + create(); + + return load_stream_impl(*this, stream, options, encoding_wchar); + } +#endif + + xml_parse_result xml_document::load(const char_t* contents, unsigned int options) + { + create(); + + // Force native encoding (skip autodetection) + #ifdef PUGIXML_WCHAR_MODE + xml_encoding encoding = encoding_wchar; + #else + xml_encoding encoding = encoding_utf8; + #endif + + return load_buffer(contents, impl::strlen(contents) * sizeof(char_t), options, encoding); + } + + xml_parse_result xml_document::parse(char* xmlstr, unsigned int options) + { + return load_buffer_inplace(xmlstr, strlen(xmlstr), options, encoding_utf8); + } + + xml_parse_result xml_document::parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options) + { + return load_buffer_inplace_own(xmlstr, strlen(xmlstr), options, encoding_utf8); + } + + xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding) + { + create(); + + FILE* file = fopen(path, "rb"); + if (!file) return make_parse_result(status_file_not_found); + + fseek(file, 0, SEEK_END); + long length = ftell(file); + fseek(file, 0, SEEK_SET); + + if (length < 0) + { + fclose(file); + return make_parse_result(status_io_error); + } + + char* s = static_cast(global_allocate(length > 0 ? length : 1)); + + if (!s) + { + fclose(file); + return make_parse_result(status_out_of_memory); + } + + size_t read = fread(s, 1, (size_t)length, file); + fclose(file); + + if (read != (size_t)length) + { + global_deallocate(s); + return make_parse_result(status_io_error); + } + + return load_buffer_inplace_own(s, length, options, encoding); + } + + xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own) + { + create(); + + // get actual encoding + xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size); + + // get private buffer + char_t* buffer = 0; + size_t length = 0; + + if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory); + + // delete original buffer if we performed a conversion + if (own && buffer != contents) global_deallocate(contents); + + // parse + xml_parse_result res = xml_parser::parse(buffer, length, _root, options); + + // remember encoding + res.encoding = buffer_encoding; + + // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself + if (own || buffer != contents) _buffer = buffer; + + return res; + } + + xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) + { + return load_buffer_impl(const_cast(contents), size, options, encoding, false, false); + } + + xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) + { + return load_buffer_impl(contents, size, options, encoding, true, false); + } + + xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) + { + return load_buffer_impl(contents, size, options, encoding, true, true); + } + + void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const + { + if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding)); + + xml_buffered_writer buffered_writer(writer, encoding); + + if (!(flags & format_no_declaration) && !has_declaration(*this)) + { + buffered_writer.write(PUGIXML_TEXT("")); + if (!(flags & format_raw)) buffered_writer.write('\n'); + } + + node_output(buffered_writer, *this, indent, flags, 0); + } + +#ifndef PUGIXML_NO_STL + void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const + { + xml_writer_stream writer(stream); + + save(writer, indent, flags, encoding); + } + + void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags) const + { + xml_writer_stream writer(stream); + + save(writer, indent, flags, encoding_wchar); + } +#endif + + bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const + { + FILE* file = fopen(path, "wb"); + if (!file) return false; + + xml_writer_file writer(file); + save(writer, indent, flags, encoding); + + fclose(file); + + return true; + } + + void xml_document::precompute_document_order() + { + } + +#ifndef PUGIXML_NO_STL + std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) + { + assert(str); + + STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); + + size_t length = wcslen(str); + + // first pass: get length in utf8 characters + size_t size = sizeof(wchar_t) == 2 ? + utf_decoder::decode_utf16_block(reinterpret_cast(str), length, 0) : + utf_decoder::decode_utf32_block(reinterpret_cast(str), length, 0); + + // allocate resulting string + std::string result; + result.resize(size); + + // second pass: convert to utf8 + if (size > 0) + { + uint8_t* begin = reinterpret_cast(&result[0]); + uint8_t* end = sizeof(wchar_t) == 2 ? + utf_decoder::decode_utf16_block(reinterpret_cast(str), length, begin) : + utf_decoder::decode_utf32_block(reinterpret_cast(str), length, begin); + + // truncate invalid output + assert(begin <= end && static_cast(end - begin) <= result.size()); + result.resize(static_cast(end - begin)); + } + + return result; + } + + std::wstring PUGIXML_FUNCTION as_utf16(const char* str) + { + return as_wide(str); + } + + std::wstring PUGIXML_FUNCTION as_wide(const char* str) + { + assert(str); + + const uint8_t* data = reinterpret_cast(str); + size_t size = strlen(str); + + // first pass: get length in wchar_t + size_t length = utf_decoder::decode_utf8_block(data, size, 0); + + // allocate resulting string + std::wstring result; + result.resize(length); + + // second pass: convert to wchar_t + if (length > 0) + { + wchar_writer::value_type begin = reinterpret_cast(&result[0]); + wchar_writer::value_type end = utf_decoder::decode_utf8_block(data, size, begin); + + // truncate invalid output + assert(begin <= end && static_cast(end - begin) <= result.size()); + result.resize(static_cast(end - begin)); + } + + return result; + } +#endif + + void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) + { + global_allocate = allocate; + global_deallocate = deallocate; + } + + allocation_function PUGIXML_FUNCTION get_memory_allocation_function() + { + return global_allocate; + } + + deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() + { + return global_deallocate; + } +} + +#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) +namespace std +{ + // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) + { + return std::bidirectional_iterator_tag(); + } + + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) + { + return std::bidirectional_iterator_tag(); + } +} +#endif + +#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) +namespace std +{ + // Workarounds for (non-standard) iterator category detection + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) + { + return std::bidirectional_iterator_tag(); + } + + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) + { + return std::bidirectional_iterator_tag(); + } +} +#endif + +/** + * Copyright (c) 2006-2010 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 8207f16..281a0ea 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -1,2378 +1,2378 @@ -/** - * pugixml parser - version 0.9 - * -------------------------------------------------------- - * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) - * Report bugs and download new versions at http://code.google.com/p/pugixml/ - * - * This library is distributed under the MIT License. See notice at the end - * of this file. - * - * This work is based on the pugxml parser, which is: - * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) - */ - -#ifndef HEADER_PUGIXML_HPP -#define HEADER_PUGIXML_HPP - -#include "pugiconfig.hpp" - -#ifndef PUGIXML_NO_STL -namespace std -{ - struct bidirectional_iterator_tag; - -#ifdef __SUNPRO_CC - // Sun C++ compiler has a bug which forces template argument names in forward declarations to be the same as in actual definitions - template class allocator; - template struct char_traits; - template class basic_istream; - template class basic_ostream; - template class basic_string; -#else - // Borland C++ compiler has a bug which forces template argument names in forward declarations to be the same as in actual definitions - template class allocator; - template struct char_traits; - template class basic_istream; - template class basic_ostream; - template class basic_string; -#endif - - // Digital Mars compiler has a bug which requires a forward declaration for explicit instantiation (otherwise type selection is messed up later, producing link errors) - // Also note that we have to declare char_traits as a class here, since it's defined that way -#ifdef __DMC__ - template <> class char_traits; -#endif -} -#endif - -// Macro for deprecated features -#ifndef PUGIXML_DEPRECATED -# if defined(__GNUC__) -# define PUGIXML_DEPRECATED __attribute__((deprecated)) -# elif defined(_MSC_VER) && _MSC_VER >= 1300 -# define PUGIXML_DEPRECATED __declspec(deprecated) -# else -# define PUGIXML_DEPRECATED -# endif -#endif - -// No XPath without STL or exceptions -#if (defined(PUGIXML_NO_STL) || defined(PUGIXML_NO_EXCEPTIONS)) && !defined(PUGIXML_NO_XPATH) -# define PUGIXML_NO_XPATH -#endif - -// Include exception header for XPath -#ifndef PUGIXML_NO_XPATH -# include -#endif - -// If no API is defined, assume default -#ifndef PUGIXML_API -# define PUGIXML_API -#endif - -// If no API for classes is defined, assume default -#ifndef PUGIXML_CLASS -# define PUGIXML_CLASS PUGIXML_API -#endif - -// If no API for functions is defined, assume default -#ifndef PUGIXML_FUNCTION -# define PUGIXML_FUNCTION PUGIXML_API -#endif - -#include - -// Character interface macros -#ifdef PUGIXML_WCHAR_MODE -# define PUGIXML_TEXT(t) L ## t - -namespace pugi -{ - /// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE - typedef wchar_t char_t; - -#ifndef PUGIXML_NO_STL - /// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE - typedef std::basic_string, std::allocator > string_t; -#endif -} -#else -# define PUGIXML_TEXT(t) t - -namespace pugi -{ - /// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE - typedef char char_t; - -# ifndef PUGIXML_NO_STL - // GCC 3.4 has a bug which prevents string_t instantiation using char_t, so we have to use char type explicitly - /// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE - typedef std::basic_string, std::allocator > string_t; -# endif -} -#endif - -// Helpers for inline implementation -namespace pugi -{ - namespace impl - { - bool PUGIXML_FUNCTION strequal(const char_t*, const char_t*); - bool PUGIXML_FUNCTION strequalwild(const char_t*, const char_t*); - } -} - -/// The PugiXML Parser namespace. -namespace pugi -{ - /// Tree node classification. - enum xml_node_type - { - node_null, ///< Undifferentiated entity - node_document, ///< A document tree's absolute root. - node_element, ///< E.g. '<...>' - node_pcdata, ///< E.g. '>...<' - node_cdata, ///< E.g. '' - node_comment, ///< E.g. '' - node_pi, ///< E.g. '' - node_declaration ///< E.g. '' - }; - - // Parsing options - - /** - * Minimal parsing mode. Equivalent to turning all other flags off. This set of flags means - * that pugixml does not add pi/cdata sections or comments to DOM tree and does not perform - * any conversions for input data, meaning fastest parsing. - */ - const unsigned int parse_minimal = 0x0000; - - /** - * This flag determines if processing instructions (nodes with type node_pi; such nodes have the - * form of or in XML) are to be put in DOM tree. If this flag is off, - * they are not put in the tree, but are still parsed and checked for correctness. - * - * The corresponding node in DOM tree will have type node_pi, name "target" and value "content", - * if any. - * - * Note that (document declaration) is not considered to be a PI. - * - * This flag is off by default. - */ - const unsigned int parse_pi = 0x0001; - - /** - * This flag determines if comments (nodes with type node_comment; such nodes have the form of - * in XML) are to be put in DOM tree. If this flag is off, they are not put in - * the tree, but are still parsed and checked for correctness. - * - * The corresponding node in DOM tree will have type node_comment, empty name and value "content". - * - * This flag is off by default. - */ - const unsigned int parse_comments = 0x0002; - - /** - * This flag determines if CDATA sections (nodes with type node_cdata; such nodes have the form - * of in XML) are to be put in DOM tree. If this flag is off, they are not - * put in the tree, but are still parsed and checked for correctness. - * - * The corresponding node in DOM tree will have type node_cdata, empty name and value "content". - * - * This flag is on by default. - */ - const unsigned int parse_cdata = 0x0004; - - /** - * This flag determines if nodes with PCDATA (regular text) that consist only of whitespace - * characters are to be put in DOM tree. Often whitespace-only data is not significant for the - * application, and the cost of allocating and storing such nodes (both memory and speed-wise) - * can be significant. For example, after parsing XML string " ", element - * will have 3 children when parse_ws_pcdata is set (child with type node_pcdata and value=" ", - * child with type node_element and name "a", and another child with type node_pcdata and - * value=" "), and only 1 child when parse_ws_pcdata is not set. - * - * This flag is off by default. - */ - const unsigned int parse_ws_pcdata = 0x0008; - - /** - * This flag determines if character and entity references are to be expanded during the parsing - * process. Character references are &#...; or &#x...; (... is Unicode numeric representation of - * character in either decimal (&#...;) or hexadecimal (&#x...;) form), entity references are &...; - * Note that as pugixml does not handle DTD, the only allowed entities are predefined ones - - * &lt;, &gt;, &amp;, &apos; and &quot;. If character/entity reference can not be expanded, it is - * leaved as is, so you can do additional processing later. - * Reference expansion is performed in attribute values and PCDATA content. - * - * This flag is on by default. - */ - const unsigned int parse_escapes = 0x0010; - - /** - * This flag determines if EOL handling (that is, replacing sequences 0x0d 0x0a by a single 0x0a - * character, and replacing all standalone 0x0d characters by 0x0a) is to be performed on input - * data (that is, comments contents, PCDATA/CDATA contents and attribute values). - * - * This flag is on by default. - */ - const unsigned int parse_eol = 0x0020; - - /** - * This flag determines if attribute value normalization should be performed for all attributes. - * This means, that: - * 1. Whitespace characters (new line, tab and space) are replaced with space (' ') - * 2. Afterwards sequences of spaces are replaced with a single space - * 3. Leading/trailing whitespace characters are trimmed - * - * This flag is off by default. - */ - const unsigned int parse_wnorm_attribute = 0x0080; - - /** - * This flag determines if attribute value normalization should be performed for all attributes. - * This means, that whitespace characters (new line, tab and space) are replaced with space (' '). - * Note, that the actions performed while this flag is on are also performed if parse_wnorm_attribute - * is on, so this flag has no effect if parse_wnorm_attribute flag is set. - * New line characters are always treated as if parse_eol is set, i.e. \r\n is converted to single space. - * - * This flag is on by default. - */ - const unsigned int parse_wconv_attribute = 0x0040; - - /** - * This flag determines if XML document declaration (this node has the form of in XML) - * are to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed - * and checked for correctness. - * - * The corresponding node in DOM tree will have type node_declaration, name "xml" and attributes, - * if any. - * - * This flag is off by default. - */ - const unsigned int parse_declaration = 0x0100; - - /** - * This is the default set of flags. It includes parsing CDATA sections (comments/PIs are not - * parsed), performing character and entity reference expansion, replacing whitespace characters - * with spaces in attribute values and performing EOL handling. Note, that PCDATA sections - * consisting only of whitespace characters are not parsed (by default) for performance reasons. - */ - const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; - - /** - * These flags determine the encoding of input data for XML document. Default mode is encoding_auto, - * which means that document encoding is auto-detected from BOM and necessary encoding conversions are - * applied. You can override this mode by using any of the specific encodings. - */ - enum xml_encoding - { - encoding_auto, //!< Auto-detect input encoding using BOM or < / 800 - PUGIXML_DEPRECATED -#endif - const unsigned int format_write_bom_utf8 = format_write_bom; - - /** - * If this flag is on, no indentation is performed and no line breaks are written to output file. - * This means that the data is written to output stream as is. - * - * This flag is off by default. - */ - const unsigned int format_raw = 0x04; - - /** - * If this flag is on, no default XML declaration is written to output file. - * This means that there will be no XML declaration in output stream unless there was one in XML document - * (i.e. if it was parsed with parse_declaration flag). - * - * This flag is off by default. - */ - const unsigned int format_no_declaration = 0x08; - - /** - * This is the default set of formatting flags. It includes indenting nodes depending on their - * depth in DOM tree. - */ - const unsigned int format_default = format_indent; - - // Forward declarations - struct xml_attribute_struct; - struct xml_node_struct; - - class xml_node_iterator; - class xml_attribute_iterator; - - class xml_tree_walker; - - class xml_node; - - #ifndef PUGIXML_NO_XPATH - class xpath_node; - class xpath_node_set; - class xpath_ast_node; - class xpath_allocator; - - /// XPath query return type classification - enum xpath_value_type - { - xpath_type_none, ///< Unknown type (query failed to compile) - xpath_type_node_set, ///< Node set (xpath_node_set) - xpath_type_number, ///< Number - xpath_type_string, ///< String - xpath_type_boolean ///< Boolean - }; - - /// XPath query return type classification - /// \deprecated This type is deprecated and will be removed in future versions; use xpath_value_type instead - typedef xpath_value_type xpath_type_t; - - /** - * A class that holds compiled XPath query and allows to evaluate query result - */ - class PUGIXML_CLASS xpath_query - { - private: - // Non-copyable semantics - xpath_query(const xpath_query&); - xpath_query& operator=(const xpath_query&); - - xpath_allocator* m_alloc; - xpath_ast_node* m_root; - - void compile(const char_t* query); - - public: - /** - * Constructor from string with XPath expression. - * Throws xpath_exception on compilation error, std::bad_alloc on out of memory error. - * - * \param query - string with XPath expression - */ - explicit xpath_query(const char_t* query); - - /** - * Destructor - */ - ~xpath_query(); - - /** - * Get query expression return type - * - * \return expression return type - **/ - xpath_value_type return_type() const; - - /** - * Evaluate expression as boolean value for the context node \a n. - * If expression does not directly evaluate to boolean, the expression result is converted - * as through boolean() XPath function call. - * Throws std::bad_alloc on out of memory error. - * - * \param n - context node - * \return evaluation result - */ - bool evaluate_boolean(const xml_node& n) const; - - /** - * Evaluate expression as double value for the context node \a n. - * If expression does not directly evaluate to double, the expression result is converted - * as through number() XPath function call. - * Throws std::bad_alloc on out of memory error. - * - * \param n - context node - * \return evaluation result - */ - double evaluate_number(const xml_node& n) const; - - /** - * Evaluate expression as string value for the context node \a n. - * If expression does not directly evaluate to string, the expression result is converted - * as through string() XPath function call. - * Throws std::bad_alloc on out of memory error. - * - * \param n - context node - * \return evaluation result - */ - string_t evaluate_string(const xml_node& n) const; - - /** - * Evaluate expression as node set for the context node \a n. - * If expression does not directly evaluate to node set, throws xpath_exception. - * Throws std::bad_alloc on out of memory error. - * - * \param n - context node - * \return evaluation result - */ - xpath_node_set evaluate_node_set(const xml_node& n) const; - }; - #endif - - /** - * Abstract writer class - * \see xml_node::print - */ - class PUGIXML_CLASS xml_writer - { - public: - /** - * Virtual destructor - */ - virtual ~xml_writer() {} - - /** - * Write memory chunk into stream/file/whatever - * - * \param data - data pointer - * \param size - data size - */ - virtual void write(const void* data, size_t size) = 0; - }; - - /** xml_writer implementation for FILE* - * \see xml_writer - */ - class PUGIXML_CLASS xml_writer_file: public xml_writer - { - public: - /** - * Construct writer instance - * - * \param file - this is FILE* object, void* is used to avoid header dependencies on stdio - */ - xml_writer_file(void* file); - - virtual void write(const void* data, size_t size); - - private: - void* file; - }; - - #ifndef PUGIXML_NO_STL - /** xml_writer implementation for streams - * \see xml_writer - */ - class PUGIXML_CLASS xml_writer_stream: public xml_writer - { - public: - /** - * Construct writer instance - * - * \param stream - output stream object - */ - xml_writer_stream(std::basic_ostream >& stream); - - /** - * Construct writer instance - * - * \param stream - output stream object - */ - xml_writer_stream(std::basic_ostream >& stream); - - virtual void write(const void* data, size_t size); - - private: - std::basic_ostream >* narrow_stream; - std::basic_ostream >* wide_stream; - }; - #endif - - /** - * A light-weight wrapper for manipulating attributes in DOM tree. - * Note: xml_attribute does not allocate any memory for the attribute it wraps; it only wraps a - * pointer to existing attribute. - */ - class PUGIXML_CLASS xml_attribute - { - friend class xml_attribute_iterator; - friend class xml_node; - - private: - xml_attribute_struct* _attr; - - /// \internal Safe bool type -#ifdef __MWERKS__ - typedef bool (xml_attribute::*unspecified_bool_type)() const; -#else - typedef xml_attribute_struct* xml_attribute::*unspecified_bool_type; -#endif - - /// \internal Initializing constructor - explicit xml_attribute(xml_attribute_struct* attr); - - public: - /** - * Default constructor. Constructs an empty attribute. - */ - xml_attribute(); - - public: - /** - * Safe bool conversion. - * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. - */ - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator==(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator!=(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator<(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator>(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator<=(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator>=(const xml_attribute& r) const; - - public: - /** - * Get next attribute in attribute list of node that contains the attribute. - * - * \return next attribute, if any; empty attribute otherwise - */ - xml_attribute next_attribute() const; - - /** - * Get previous attribute in attribute list of node that contains the attribute. - * - * \return previous attribute, if any; empty attribute otherwise - */ - xml_attribute previous_attribute() const; - - /** - * Cast attribute value as int. - * - * \return attribute value as int, or 0 if conversion did not succeed or attribute is empty - */ - int as_int() const; - - /** - * Cast attribute value as unsigned int. - * - * \return attribute value as unsigned int, or 0 if conversion did not succeed or attribute is empty - * \note values out of non-negative int range (usually [0, 2^31-1]) get clamped to range boundaries - */ - unsigned int as_uint() const; - - /** - * Cast attribute value as double. - * - * \return attribute value as double, or 0.0 if conversion did not succeed or attribute is empty - */ - double as_double() const; - - /** - * Cast attribute value as float. - * - * \return attribute value as float, or 0.0f if conversion did not succeed or attribute is empty - */ - float as_float() const; - - /** - * Cast attribute value as bool. Returns true for attributes with values that start with '1', - * 't', 'T', 'y', 'Y', returns false for other attributes. - * - * \return attribute value as bool, or false if conversion did not succeed or attribute is empty - */ - bool as_bool() const; - - /// \internal Document order or 0 if not set - /// \deprecated This function is deprecated - PUGIXML_DEPRECATED unsigned int document_order() const; - - public: - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(const char_t* rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(int rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(unsigned int rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(double rhs); - - /** - * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false). - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(bool rhs); - - /** - * Set attribute name to \a rhs. - * - * \param rhs - new attribute name - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_name(const char_t* rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(const char_t* rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(int rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(unsigned int rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(double rhs); - - /** - * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false). - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(bool rhs); - - public: - /** - * Check if attribute is empty. - * - * \return true if attribute is empty, false otherwise - */ - bool empty() const; - - public: - /** - * Get attribute name. - * - * \return attribute name, or "" if attribute is empty - */ - const char_t* name() const; - - /** - * Get attribute value. - * - * \return attribute value, or "" if attribute is empty - */ - const char_t* value() const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs); -#endif - - /** - * A light-weight wrapper for manipulating nodes in DOM tree. - * Note: xml_node does not allocate any memory for the node it wraps; it only wraps a pointer to - * existing node. - */ - class PUGIXML_CLASS xml_node - { - friend class xml_attribute_iterator; - friend class xml_node_iterator; - - protected: - xml_node_struct* _root; - - /// \internal Safe bool type -#ifdef __MWERKS__ - typedef bool (xml_node::*unspecified_bool_type)() const; -#else - typedef xml_node_struct* xml_node::*unspecified_bool_type; -#endif - - /// \internal Initializing constructor - explicit xml_node(xml_node_struct* p); - - private: - template void all_elements_by_name_helper(const char_t* name, OutputIterator it) const - { - if (!_root) return; - - for (xml_node node = first_child(); node; node = node.next_sibling()) - { - if (node.type() == node_element) - { - if (impl::strequal(name, node.name())) - { - *it = node; - ++it; - } - - if (node.first_child()) node.all_elements_by_name_helper(name, it); - } - } - } - - template void all_elements_by_name_w_helper(const char_t* name, OutputIterator it) const - { - if (!_root) return; - - for (xml_node node = first_child(); node; node = node.next_sibling()) - { - if (node.type() == node_element) - { - if (impl::strequalwild(name, node.name())) - { - *it = node; - ++it; - } - - if (node.first_child()) node.all_elements_by_name_w_helper(name, it); - } - } - } - - public: - /** - * Default constructor. Constructs an empty node. - */ - xml_node(); - - public: - /** - * Safe bool conversion. - * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. - */ - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator==(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator!=(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator<(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator>(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator<=(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator>=(const xml_node& r) const; - - public: - /** - * Node iterator type (for child nodes). - * \see xml_node_iterator - */ - typedef xml_node_iterator iterator; - - /** - * Node iterator type (for child nodes). - * \see xml_attribute_iterator - */ - typedef xml_attribute_iterator attribute_iterator; - - /** - * Access the begin iterator for this node's collection of child nodes. - * - * \return iterator that points to the first child node, or past-the-end iterator if node is empty or has no children - */ - iterator begin() const; - - /** - * Access the end iterator for this node's collection of child nodes. - * - * \return past-the-end iterator for child list - */ - iterator end() const; - - /** - * Access the begin iterator for this node's collection of attributes. - * - * \return iterator that points to the first attribute, or past-the-end iterator if node is empty or has no attributes - */ - attribute_iterator attributes_begin() const; - - /** - * Access the end iterator for this node's collection of attributes. - * - * \return past-the-end iterator for attribute list - */ - attribute_iterator attributes_end() const; - - public: - /** - * Check if node is empty. - * - * \return true if node is empty, false otherwise - */ - bool empty() const; - - public: - /** - * Get node type - * - * \return node type; node_null for empty nodes - */ - xml_node_type type() const; - - /** - * Get node name (element name for element nodes, PI target for PI) - * - * \return node name, if any; "" otherwise - */ - const char_t* name() const; - - /** - * Get node value (comment/PI/PCDATA/CDATA contents, depending on node type) - * - * \return node value, if any; "" otherwise - */ - const char_t* value() const; - - /** - * Get child with the specified name - * - * \param name - child name - * \return child with the specified name, if any; empty node otherwise - */ - xml_node child(const char_t* name) const; - - /** - * Get child with the name that matches specified pattern - * - * \param name - child name pattern - * \return child with the name that matches pattern, if any; empty node otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node child_w(const char_t* name) const; - - /** - * Get attribute with the specified name - * - * \param name - attribute name - * \return attribute with the specified name, if any; empty attribute otherwise - */ - xml_attribute attribute(const char_t* name) const; - - /** - * Get attribute with the name that matches specified pattern - * - * \param name - attribute name pattern - * \return attribute with the name that matches pattern, if any; empty attribute otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_attribute attribute_w(const char_t* name) const; - - /** - * Get first of following sibling nodes with the specified name - * - * \param name - sibling name - * \return node with the specified name, if any; empty node otherwise - */ - xml_node next_sibling(const char_t* name) const; - - /** - * Get first of the following sibling nodes with the name that matches specified pattern - * - * \param name - sibling name pattern - * \return node with the name that matches pattern, if any; empty node otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node next_sibling_w(const char_t* name) const; - - /** - * Get following sibling - * - * \return following sibling node, if any; empty node otherwise - */ - xml_node next_sibling() const; - - /** - * Get first of preceding sibling nodes with the specified name - * - * \param name - sibling name - * \return node with the specified name, if any; empty node otherwise - */ - xml_node previous_sibling(const char_t* name) const; - - /** - * Get first of the preceding sibling nodes with the name that matches specified pattern - * - * \param name - sibling name pattern - * \return node with the name that matches pattern, if any; empty node otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node previous_sibling_w(const char_t* name) const; - - /** - * Get preceding sibling - * - * \return preceding sibling node, if any; empty node otherwise - */ - xml_node previous_sibling() const; - - /** - * Get parent node - * - * \return parent node if any; empty node otherwise - */ - xml_node parent() const; - - /** - * Get root of DOM tree this node belongs to. - * - * \return tree root - */ - xml_node root() const; - - /** - * Get child value of current node; that is, value of the first child node of type PCDATA/CDATA - * - * \return child value of current node, if any; "" otherwise - */ - const char_t* child_value() const; - - /** - * Get child value of child with specified name. \see child_value - * node.child_value(name) is equivalent to node.child(name).child_value() - * - * \param name - child name - * \return child value of specified child node, if any; "" otherwise - */ - const char_t* child_value(const char_t* name) const; - - /** - * Get child value of child with name that matches the specified pattern. \see child_value - * node.child_value_w(name) is equivalent to node.child_w(name).child_value() - * - * \param name - child name pattern - * \return child value of specified child node, if any; "" otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED const char_t* child_value_w(const char_t* name) const; - - public: - /** - * Set node name to \a rhs (for PI/element nodes). \see name - * - * \param rhs - new node name - * \return success flag (call fails if node is of the wrong type or there is not enough memory) - */ - bool set_name(const char_t* rhs); - - /** - * Set node value to \a rhs (for PI/PCDATA/CDATA/comment nodes). \see value - * - * \param rhs - new node value - * \return success flag (call fails if node is of the wrong type or there is not enough memory) - */ - bool set_value(const char_t* rhs); - - /** - * Add attribute with specified name (for element nodes) - * - * \param name - attribute name - * \return added attribute, or empty attribute if there was an error (wrong node type) - */ - xml_attribute append_attribute(const char_t* name); - - /** - * Insert attribute with specified name after \a attr (for element nodes) - * - * \param name - attribute name - * \param attr - attribute to insert a new one after - * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) - */ - xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr); - - /** - * Insert attribute with specified name before \a attr (for element nodes) - * - * \param name - attribute name - * \param attr - attribute to insert a new one before - * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) - */ - xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr); - - /** - * Add a copy of the specified attribute (for element nodes) - * - * \param proto - attribute prototype which is to be copied - * \return inserted attribute, or empty attribute if there was an error (wrong node type) - */ - xml_attribute append_copy(const xml_attribute& proto); - - /** - * Insert a copy of the specified attribute after \a attr (for element nodes) - * - * \param proto - attribute prototype which is to be copied - * \param attr - attribute to insert a new one after - * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) - */ - xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); - - /** - * Insert a copy of the specified attribute before \a attr (for element nodes) - * - * \param proto - attribute prototype which is to be copied - * \param attr - attribute to insert a new one before - * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) - */ - xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); - - /** - * Add child node with specified type (for element nodes) - * - * \param type - node type - * \return added node, or empty node if there was an error (wrong node type) - */ - xml_node append_child(xml_node_type type = node_element); - - /** - * Insert child node with specified type after \a node (for element nodes) - * - * \param type - node type - * \param node - node to insert a new one after - * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) - */ - xml_node insert_child_after(xml_node_type type, const xml_node& node); - - /** - * Insert child node with specified type before \a node (for element nodes) - * - * \param type - node type - * \param node - node to insert a new one before - * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) - */ - xml_node insert_child_before(xml_node_type type, const xml_node& node); - - /** - * Add a copy of the specified node as a child (for element nodes) - * - * \param proto - node prototype which is to be copied - * \return inserted node, or empty node if there was an error (wrong node type) - */ - xml_node append_copy(const xml_node& proto); - - /** - * Insert a copy of the specified node after \a node (for element nodes) - * - * \param proto - node prototype which is to be copied - * \param node - node to insert a new one after - * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) - */ - xml_node insert_copy_after(const xml_node& proto, const xml_node& node); - - /** - * Insert a copy of the specified node before \a node (for element nodes) - * - * \param proto - node prototype which is to be copied - * \param node - node to insert a new one before - * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) - */ - xml_node insert_copy_before(const xml_node& proto, const xml_node& node); - - /** - * Remove specified attribute - * - * \param a - attribute to be removed - * \return success flag - */ - bool remove_attribute(const xml_attribute& a); - - /** - * Remove attribute with the specified name, if any - * - * \param name - attribute name - * \return success flag - */ - bool remove_attribute(const char_t* name); - - /** - * Remove specified child - * - * \param n - child node to be removed - * \return success flag - */ - bool remove_child(const xml_node& n); - - /** - * Remove child with the specified name, if any - * - * \param name - child name - * \return success flag - */ - bool remove_child(const char_t* name); - - public: - /** - * Get first attribute - * - * \return first attribute, if any; empty attribute otherwise - */ - xml_attribute first_attribute() const; - - /** - * Get last attribute - * - * \return last attribute, if any; empty attribute otherwise - */ - xml_attribute last_attribute() const; - - /** - * Get all elements from subtree with given name - * - * \param name - node name - * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) - * - * \deprecated This function is deprecated - */ - template PUGIXML_DEPRECATED void all_elements_by_name(const char_t* name, OutputIterator it) const - { - all_elements_by_name_helper(name, it); - } - - /** - * Get all elements from subtree with name that matches given pattern - * - * \param name - node name pattern - * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) - * - * \deprecated This function is deprecated - */ - template PUGIXML_DEPRECATED void all_elements_by_name_w(const char_t* name, OutputIterator it) const - { - all_elements_by_name_w_helper(name, it); - } - - /** - * Get first child - * - * \return first child, if any; empty node otherwise - */ - xml_node first_child() const; - - /** - * Get last child - * - * \return last child, if any; empty node otherwise - */ - xml_node last_child() const; - - /** - * Find attribute using predicate - * - * \param pred - predicate, that takes xml_attribute and returns bool - * \return first attribute for which predicate returned true, or empty attribute - */ - template xml_attribute find_attribute(Predicate pred) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute()) - if (pred(attrib)) - return attrib; - - return xml_attribute(); - } - - /** - * Find child node using predicate - * - * \param pred - predicate, that takes xml_node and returns bool - * \return first child node for which predicate returned true, or empty node - */ - template xml_node find_child(Predicate pred) const - { - if (!_root) return xml_node(); - - for (xml_node node = first_child(); node; node = node.next_sibling()) - if (pred(node)) - return node; - - return xml_node(); - } - - /** - * Find node from subtree using predicate - * - * \param pred - predicate, that takes xml_node and returns bool - * \return first node from subtree for which predicate returned true, or empty node - */ - template xml_node find_node(Predicate pred) const - { - if (!_root) return xml_node(); - - for (xml_node node = first_child(); node; node = node.next_sibling()) - { - if (pred(node)) - return node; - - if (node.first_child()) - { - xml_node found = node.find_node(pred); - if (found) return found; - } - } - - return xml_node(); - } - - /** - * Find child node with the specified name that has specified attribute - * - * \param name - child node name - * \param attr_name - attribute name of child node - * \param attr_value - attribute value of child node - * \return first matching child node, or empty node - */ - xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; - - /** - * Find child node with the specified name that has specified attribute (use pattern matching for node name and attribute name/value) - * - * \param name - pattern for child node name - * \param attr_name - pattern for attribute name of child node - * \param attr_value - pattern for attribute value of child node - * \return first matching child node, or empty node - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; - - /** - * Find child node that has specified attribute - * - * \param attr_name - attribute name of child node - * \param attr_value - attribute value of child node - * \return first matching child node, or empty node - */ - xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; - - /** - * Find child node that has specified attribute (use pattern matching for attribute name/value) - * - * \param attr_name - pattern for attribute name of child node - * \param attr_value - pattern for attribute value of child node - * \return first matching child node, or empty node - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const; - - #ifndef PUGIXML_NO_STL - /** - * Get the absolute node path from root as a text string. - * - * \param delimiter - delimiter character to insert between element names - * \return path string (e.g. '/bookstore/book/author'). - */ - string_t path(char_t delimiter = '/') const; - #endif - - /** - * Search for a node by path. - * \param path - path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative - * to root), '../foo/bar'. - * \param delimiter - delimiter character to use while tokenizing path - * \return matching node, if any; empty node otherwise - */ - xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const; - - /** - * Recursively traverse subtree with xml_tree_walker - * \see xml_tree_walker::begin - * \see xml_tree_walker::for_each - * \see xml_tree_walker::end - * - * \param walker - tree walker to traverse subtree with - * \return traversal result - */ - bool traverse(xml_tree_walker& walker); - - #ifndef PUGIXML_NO_XPATH - /** - * Select single node by evaluating XPath query - * - * \param query - query string - * \return first node from the resulting node set by document order, or empty node if none found - */ - xpath_node select_single_node(const char_t* query) const; - - /** - * Select single node by evaluating XPath query - * - * \param query - compiled query - * \return first node from the resulting node set by document order, or empty node if none found - */ - xpath_node select_single_node(const xpath_query& query) const; - - /** - * Select node set by evaluating XPath query - * - * \param query - query string - * \return resulting node set - */ - xpath_node_set select_nodes(const char_t* query) const; - - /** - * Select node set by evaluating XPath query - * - * \param query - compiled query - * \return resulting node set - */ - xpath_node_set select_nodes(const xpath_query& query) const; - #endif - - /// \internal Document order or 0 if not set - /// \deprecated This function is deprecated - PUGIXML_DEPRECATED unsigned int document_order() const; - - /** - * Print subtree to writer - * - * \param writer - writer object - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - * \param depth - starting depth (used for indentation) - */ - void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - - #ifndef PUGIXML_NO_STL - /** - * Print subtree to stream - * - * \param os - output stream - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - * \param depth - starting depth (used for indentation) - */ - void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - - /** - * Print subtree to stream - * - * \param os - output stream - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - * \param depth - starting depth (used for indentation) - */ - void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const; - #endif - - /** - * Get node offset in parsed file/string (in bytes) for debugging purposes - * - * \return offset in bytes to start of node data, or -1 in case of error - * \note This will return -1 if node information changed to the extent that it's no longer possible to calculate offset, for example - * if element node name has significantly changed; this is guaranteed to return correct offset only for nodes that have not changed - * since parsing. - */ - ptrdiff_t offset_debug() const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs); -#endif - - /** - * Child node iterator. - * It's a bidirectional iterator with value type 'xml_node'. - */ - class PUGIXML_CLASS xml_node_iterator - { - friend class xml_node; - - private: - xml_node _wrap; - xml_node _parent; - - /// \internal Initializing constructor - xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent); - - public: - /** - * Iterator traits - */ - typedef ptrdiff_t difference_type; - typedef xml_node value_type; - typedef xml_node* pointer; - typedef xml_node& reference; - - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif - - /** - * Default constructor - */ - xml_node_iterator(); - - /** - * Initializing constructor - * - * \param node - node that iterator will point at - */ - xml_node_iterator(const xml_node& node); - - /** - * Check if this iterator is equal to \a rhs - * - * \param rhs - other iterator - * \return comparison result - */ - bool operator==(const xml_node_iterator& rhs) const; - - /** - * Check if this iterator is not equal to \a rhs - * - * \param rhs - other iterator - * \return comparison result - */ - bool operator!=(const xml_node_iterator& rhs) const; - - /** - * Dereferencing operator - * - * \return reference to the node iterator points at - */ - xml_node& operator*(); - - /** - * Member access operator - * - * \return pointer to the node iterator points at - */ - xml_node* operator->(); - - /** - * Pre-increment operator - * - * \return self - */ - const xml_node_iterator& operator++(); - - /** - * Post-increment operator - * - * \return old value - */ - xml_node_iterator operator++(int); - - /** - * Pre-decrement operator - * - * \return self - */ - const xml_node_iterator& operator--(); - - /** - * Post-decrement operator - * - * \return old value - */ - xml_node_iterator operator--(int); - }; - - /** - * Attribute iterator. - * It's a bidirectional iterator with value type 'xml_attribute'. - */ - class PUGIXML_CLASS xml_attribute_iterator - { - friend class xml_node; - - private: - xml_attribute _wrap; - xml_node _parent; - - /// \internal Initializing constructor - xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent); - - public: - /** - * Iterator traits - */ - typedef ptrdiff_t difference_type; - typedef xml_attribute value_type; - typedef xml_attribute* pointer; - typedef xml_attribute& reference; - - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif - - /** - * Default constructor - */ - xml_attribute_iterator(); - - /** - * Initializing constructor - * - * \param attr - attribute that iterator will point at - * \param parent - parent node of the attribute - */ - xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent); - - /** - * Check if this iterator is equal to \a rhs - * - * \param rhs - other iterator - * \return comparison result - */ - bool operator==(const xml_attribute_iterator& rhs) const; - - /** - * Check if this iterator is not equal to \a rhs - * - * \param rhs - other iterator - * \return comparison result - */ - bool operator!=(const xml_attribute_iterator& rhs) const; - - /** - * Dereferencing operator - * - * \return reference to the node iterator points at - */ - xml_attribute& operator*(); - - /** - * Member access operator - * - * \return pointer to the node iterator points at - */ - xml_attribute* operator->(); - - /** - * Pre-increment operator - * - * \return self - */ - const xml_attribute_iterator& operator++(); - - /** - * Post-increment operator - * - * \return old value - */ - xml_attribute_iterator operator++(int); - - /** - * Pre-decrement operator - * - * \return self - */ - const xml_attribute_iterator& operator--(); - - /** - * Post-decrement operator - * - * \return old value - */ - xml_attribute_iterator operator--(int); - }; - - /** - * Abstract tree walker class - * \see xml_node::traverse - */ - class PUGIXML_CLASS xml_tree_walker - { - friend class xml_node; - - private: - int _depth; - - protected: - /** - * Get node depth - * - * \return node depth - */ - int depth() const; - - public: - /** - * Default constructor - */ - xml_tree_walker(); - - /** - * Virtual destructor - */ - virtual ~xml_tree_walker(); - - public: - /** - * Callback that is called when traversal of node begins. - * - * \return returning false will abort the traversal - */ - virtual bool begin(xml_node&); - - /** - * Callback that is called for each node traversed - * - * \return returning false will abort the traversal - */ - virtual bool for_each(xml_node&) = 0; - - /** - * Callback that is called when traversal of node ends. - * - * \return returning false will abort the traversal - */ - virtual bool end(xml_node&); - }; - - /** - * Struct used to distinguish parsing with ownership transfer from parsing without it. - * \see xml_document::parse - */ - struct transfer_ownership_tag {}; - - /** - * Parsing status enumeration, returned as part of xml_parse_result struct - */ - enum xml_parse_status - { - status_ok = 0, ///< No error - - status_file_not_found, ///< File was not found during load_file() - status_io_error, ///< Error reading from file/stream - status_out_of_memory, ///< Could not allocate memory - status_internal_error, ///< Internal error occurred - - status_unrecognized_tag, ///< Parser could not determine tag type - - status_bad_pi, ///< Parsing error occurred while parsing document declaration/processing instruction () - status_bad_comment, ///< Parsing error occurred while parsing comment () - status_bad_cdata, ///< Parsing error occurred while parsing CDATA section () - status_bad_doctype, ///< Parsing error occurred while parsing document type declaration - status_bad_pcdata, ///< Parsing error occurred while parsing PCDATA section (>...<) - status_bad_start_element, ///< Parsing error occurred while parsing start element tag () - status_bad_attribute, ///< Parsing error occurred while parsing element attribute - status_bad_end_element, ///< Parsing error occurred while parsing end element tag () - status_end_element_mismatch ///< There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) - }; - - /** - * Parser result - */ - struct PUGIXML_CLASS xml_parse_result - { - /// Parsing status (\see xml_parse_status) - xml_parse_status status; - - /// Last parsed offset (in bytes from file/string start) - ptrdiff_t offset; - - /// Source document encoding - xml_encoding encoding; - - /// Cast to bool operator - operator bool() const - { - return status == status_ok; - } - - /// Get error description - const char* description() const; - }; - - /** - * Document class (DOM tree root). - * This class has non-copyable semantics (private copy constructor/assignment operator). - */ - class PUGIXML_CLASS xml_document: public xml_node - { - private: - char_t* _buffer; - - char _memory[192]; - - xml_document(const xml_document&); - const xml_document& operator=(const xml_document&); - - void create(); - void destroy(); - - xml_parse_result load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own); - - public: - /** - * Default constructor, makes empty document - */ - xml_document(); - - /** - * Destructor - */ - ~xml_document(); - - public: - #ifndef PUGIXML_NO_STL - /** - * Load document from stream. - * - * \param stream - stream with XML data - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Load document from stream. - * - * \param stream - stream with XML data - * \param options - parsing options - * \return parsing result - */ - xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default); - #endif - - /** - * Load document from string. String has to be zero-terminated. No encoding conversions are applied. - * - * \param contents - input string - * \param options - parsing options - * \return parsing result - */ - xml_parse_result load(const char_t* contents, unsigned int options = parse_default); - - /** - * Parse the given XML string in-situ. - * The string is modified; you should ensure that string data will persist throughout the - * document's lifetime. Although, document does not gain ownership over the string, so you - * should free the memory occupied by it manually. - * - * \param xmlstr - read/write string with XML data - * \param options - parsing options - * \return parsing result - * - * \deprecated This function is deprecated and will be removed in future versions; use xml_document::load_buffer_inplace instead - */ - PUGIXML_DEPRECATED xml_parse_result parse(char* xmlstr, unsigned int options = parse_default); - - /** - * Parse the given XML string in-situ (gains ownership). - * The string is modified; document gains ownership over the string, so you don't have to worry - * about it's lifetime. - * Call example: doc.parse(transfer_ownership_tag(), string, options); - * - * \param xmlstr - read/write string with XML data - * \param options - parsing options - * \return parsing result - * - * \deprecated This function is deprecated and will be removed in future versions; use xml_document::load_buffer_inplace_own instead - */ - PUGIXML_DEPRECATED xml_parse_result parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options = parse_default); - - /** - * Load document from file - * - * \param path - file path - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Load document from buffer - * - * \param contents - buffer contents - * \param size - buffer size in bytes - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Load document from buffer in-situ. - * The buffer is modified; you should ensure that buffer data will persist throughout the document's - * lifetime. Document does not gain ownership over the buffer, so you should free the buffer memory manually. - * - * \param contents - buffer contents - * \param size - buffer size in bytes - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Load document from buffer in-situ (gains buffer ownership). - * The buffer is modified; you should ensure that buffer data will persist throughout the document's - * lifetime. Document gains ownership over the buffer, so you should allocate the buffer with pugixml - * allocation function. - * - * \param contents - buffer contents - * \param size - buffer size in bytes - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Save XML to writer - * - * \param writer - writer object - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - */ - void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - #ifndef PUGIXML_NO_STL - /** - * Save XML to stream - * - * \param stream - output stream - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - */ - void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - /** - * Save XML to stream - * - * \param stream - output stream - * \param indent - indentation string - * \param flags - formatting flags - */ - void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const; - #endif - - /** - * Save XML to file - * - * \param path - file path - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - * \return success flag - */ - bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - /** - * Compute document order for the whole tree - * Sometimes this makes evaluation of XPath queries faster. - */ - PUGIXML_DEPRECATED void precompute_document_order(); - }; - -#ifndef PUGIXML_NO_XPATH - /** - * XPath exception class. - */ - class PUGIXML_CLASS xpath_exception: public std::exception - { - private: - const char* m_message; - - public: - /** - * Construct exception from static error string - * - * \param message - error string - */ - explicit xpath_exception(const char* message); - - /** - * Return error message - * - * \return error message - */ - virtual const char* what() const throw(); - }; - - /** - * XPath node class. - * - * XPath defines node to be either xml_node or xml_attribute in pugixml terminology, so xpath_node - * is either xml_node or xml_attribute. - */ - class PUGIXML_CLASS xpath_node - { - private: - xml_node m_node; - xml_attribute m_attribute; - - /// \internal Safe bool type - typedef xml_node xpath_node::*unspecified_bool_type; - - public: - /** - * Construct empty XPath node - */ - xpath_node(); - - /** - * Construct XPath node from XML node - * - * \param node - XML node - */ - xpath_node(const xml_node& node); - - /** - * Construct XPath node from XML attribute - * - * \param attribute - XML attribute - * \param parent - attribute's parent node - */ - xpath_node(const xml_attribute& attribute, const xml_node& parent); - - /** - * Get XML node, if any - * - * \return contained XML node, empty node otherwise - */ - xml_node node() const; - - /** - * Get XML attribute, if any - * - * \return contained XML attribute, if any, empty attribute otherwise - */ - xml_attribute attribute() const; - - /** - * Get parent of contained XML attribute, if any - * - * \return parent of contained XML attribute, if any, empty node otherwise - */ - xml_node parent() const; - - /** - * Safe bool conversion. - * Allows xpath_node to be used in a context where boolean variable is expected, such as 'if (node)'. - */ - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - /** - * Compares two XPath nodes - * - * \param n - XPath node to compare to - * \return comparison result - */ - bool operator==(const xpath_node& n) const; - - /** - * Compares two XPath nodes - * - * \param n - XPath node to compare to - * \return comparison result - */ - bool operator!=(const xpath_node& n) const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs); -#endif - - /** - * Not necessarily ordered constant collection of XPath nodes - */ - class PUGIXML_CLASS xpath_node_set - { - friend class xpath_ast_node; - - public: - /// Collection type - enum type_t - { - type_unsorted, ///< Not ordered - type_sorted, ///< Sorted by document order (ascending) - type_sorted_reverse ///< Sorted by document order (descending) - }; - - /// Constant iterator type - typedef const xpath_node* const_iterator; - - private: - type_t m_type; - - xpath_node m_storage; - - xpath_node* m_begin; - xpath_node* m_end; - xpath_node* m_eos; - - typedef xpath_node* iterator; - - iterator mut_begin(); - - void push_back(const xpath_node& n); - - void append(const_iterator begin, const_iterator end); - - void truncate(iterator it); - - void remove_duplicates(); - - public: - /** - * Default constructor - * Constructs empty set - */ - xpath_node_set(); - - /** - * Destructor - */ - ~xpath_node_set(); - - /** - * Copy constructor - * - * \param ns - set to copy - */ - xpath_node_set(const xpath_node_set& ns); - - /** - * Assignment operator - * - * \param ns - set to assign - * \return self - */ - xpath_node_set& operator=(const xpath_node_set& ns); - - /** - * Get collection type - * - * \return collection type - */ - type_t type() const; - - /** - * Get collection size - * - * \return collection size - */ - size_t size() const; - - /** - * Get element with the specified index - * - * \param index - requested index - * \return element - */ - const xpath_node& operator[](size_t index) const; - - /** - * Get begin constant iterator for collection - * - * \return begin constant iterator - */ - const_iterator begin() const; - - /** - * Get end iterator for collection - * - * \return end iterator - */ - const_iterator end() const; - - /** - * Sort the collection in ascending/descending order by document order - * - * \param reverse - whether to sort in ascending (false) or descending (true) order - */ - void sort(bool reverse = false); - - /** - * Get first node in the collection by document order - * - * \return first node by document order - * \note set.first() is not equal to set[0], since operator[] does not take document order into account - */ - xpath_node first() const; - - /** - * Return true if collection is empty - * - * \return true if collection is empty, false otherwise - */ - bool empty() const; - }; -#endif - -#ifndef PUGIXML_NO_STL - /** - * Convert wide string to UTF8 - * - * \param str - input wide string string - * \return output UTF8 string - */ - std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const wchar_t* str); - - /** - * Convert UTF8 to wide string - * - * \param str - input UTF8 string - * \return output wide string string - * - * \deprecated This function is deprecated and will be removed in future versions; use as_wide instead - */ - PUGIXML_DEPRECATED std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf16(const char* str); - - /** - * Convert UTF8 to wide string - * - * \param str - input UTF8 string - * \return output wide string string - */ - std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const char* str); -#endif - - /** - * Memory allocation function - * - * \param size - allocation size - * \return pointer to allocated memory on success, NULL on failure - */ - typedef void* (*allocation_function)(size_t size); - - /** - * Memory deallocation function - * - * \param ptr - pointer to memory previously allocated by allocation function - */ - typedef void (*deallocation_function)(void* ptr); - - /** - * Override default memory management functions - * - * All subsequent allocations/deallocations will be performed via supplied functions. Take care not to - * change memory management functions if any xml_document instances are still alive - this is considered - * undefined behaviour (expect crashes/memory damages/etc.). - * - * \param allocate - allocation function - * \param deallocate - deallocation function - * - * \note XPath-related allocations, as well as allocations in functions that return std::string (xml_node::path, as_utf8, as_wide) - * are not performed via these functions. - * \note If you're using parse() with ownership transfer, you have to allocate the buffer you pass to parse() with allocation - * function you set via this function. - */ - void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); - - /** - * Get current memory allocation function - * - * \return memory allocation function - * \see set_memory_management_functions - */ - allocation_function PUGIXML_FUNCTION get_memory_allocation_function(); - - /** - * Get current memory deallocation function - * - * \return memory deallocation function - * \see set_memory_management_functions - */ - deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function(); -} - -#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) -namespace std -{ - // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&); - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&); -} -#endif - -#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) -namespace std -{ - // Workarounds for (non-standard) iterator category detection - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&); - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&); -} -#endif - -#endif - -/** - * Copyright (c) 2006-2010 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/** + * pugixml parser - version 0.9 + * -------------------------------------------------------- + * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at http://code.google.com/p/pugixml/ + * + * This library is distributed under the MIT License. See notice at the end + * of this file. + * + * This work is based on the pugxml parser, which is: + * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) + */ + +#ifndef HEADER_PUGIXML_HPP +#define HEADER_PUGIXML_HPP + +#include "pugiconfig.hpp" + +#ifndef PUGIXML_NO_STL +namespace std +{ + struct bidirectional_iterator_tag; + +#ifdef __SUNPRO_CC + // Sun C++ compiler has a bug which forces template argument names in forward declarations to be the same as in actual definitions + template class allocator; + template struct char_traits; + template class basic_istream; + template class basic_ostream; + template class basic_string; +#else + // Borland C++ compiler has a bug which forces template argument names in forward declarations to be the same as in actual definitions + template class allocator; + template struct char_traits; + template class basic_istream; + template class basic_ostream; + template class basic_string; +#endif + + // Digital Mars compiler has a bug which requires a forward declaration for explicit instantiation (otherwise type selection is messed up later, producing link errors) + // Also note that we have to declare char_traits as a class here, since it's defined that way +#ifdef __DMC__ + template <> class char_traits; +#endif +} +#endif + +// Macro for deprecated features +#ifndef PUGIXML_DEPRECATED +# if defined(__GNUC__) +# define PUGIXML_DEPRECATED __attribute__((deprecated)) +# elif defined(_MSC_VER) && _MSC_VER >= 1300 +# define PUGIXML_DEPRECATED __declspec(deprecated) +# else +# define PUGIXML_DEPRECATED +# endif +#endif + +// No XPath without STL or exceptions +#if (defined(PUGIXML_NO_STL) || defined(PUGIXML_NO_EXCEPTIONS)) && !defined(PUGIXML_NO_XPATH) +# define PUGIXML_NO_XPATH +#endif + +// Include exception header for XPath +#ifndef PUGIXML_NO_XPATH +# include +#endif + +// If no API is defined, assume default +#ifndef PUGIXML_API +# define PUGIXML_API +#endif + +// If no API for classes is defined, assume default +#ifndef PUGIXML_CLASS +# define PUGIXML_CLASS PUGIXML_API +#endif + +// If no API for functions is defined, assume default +#ifndef PUGIXML_FUNCTION +# define PUGIXML_FUNCTION PUGIXML_API +#endif + +#include + +// Character interface macros +#ifdef PUGIXML_WCHAR_MODE +# define PUGIXML_TEXT(t) L ## t + +namespace pugi +{ + /// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE + typedef wchar_t char_t; + +#ifndef PUGIXML_NO_STL + /// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE + typedef std::basic_string, std::allocator > string_t; +#endif +} +#else +# define PUGIXML_TEXT(t) t + +namespace pugi +{ + /// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE + typedef char char_t; + +# ifndef PUGIXML_NO_STL + // GCC 3.4 has a bug which prevents string_t instantiation using char_t, so we have to use char type explicitly + /// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE + typedef std::basic_string, std::allocator > string_t; +# endif +} +#endif + +// Helpers for inline implementation +namespace pugi +{ + namespace impl + { + bool PUGIXML_FUNCTION strequal(const char_t*, const char_t*); + bool PUGIXML_FUNCTION strequalwild(const char_t*, const char_t*); + } +} + +/// The PugiXML Parser namespace. +namespace pugi +{ + /// Tree node classification. + enum xml_node_type + { + node_null, ///< Undifferentiated entity + node_document, ///< A document tree's absolute root. + node_element, ///< E.g. '<...>' + node_pcdata, ///< E.g. '>...<' + node_cdata, ///< E.g. '' + node_comment, ///< E.g. '' + node_pi, ///< E.g. '' + node_declaration ///< E.g. '' + }; + + // Parsing options + + /** + * Minimal parsing mode. Equivalent to turning all other flags off. This set of flags means + * that pugixml does not add pi/cdata sections or comments to DOM tree and does not perform + * any conversions for input data, meaning fastest parsing. + */ + const unsigned int parse_minimal = 0x0000; + + /** + * This flag determines if processing instructions (nodes with type node_pi; such nodes have the + * form of or in XML) are to be put in DOM tree. If this flag is off, + * they are not put in the tree, but are still parsed and checked for correctness. + * + * The corresponding node in DOM tree will have type node_pi, name "target" and value "content", + * if any. + * + * Note that (document declaration) is not considered to be a PI. + * + * This flag is off by default. + */ + const unsigned int parse_pi = 0x0001; + + /** + * This flag determines if comments (nodes with type node_comment; such nodes have the form of + * in XML) are to be put in DOM tree. If this flag is off, they are not put in + * the tree, but are still parsed and checked for correctness. + * + * The corresponding node in DOM tree will have type node_comment, empty name and value "content". + * + * This flag is off by default. + */ + const unsigned int parse_comments = 0x0002; + + /** + * This flag determines if CDATA sections (nodes with type node_cdata; such nodes have the form + * of in XML) are to be put in DOM tree. If this flag is off, they are not + * put in the tree, but are still parsed and checked for correctness. + * + * The corresponding node in DOM tree will have type node_cdata, empty name and value "content". + * + * This flag is on by default. + */ + const unsigned int parse_cdata = 0x0004; + + /** + * This flag determines if nodes with PCDATA (regular text) that consist only of whitespace + * characters are to be put in DOM tree. Often whitespace-only data is not significant for the + * application, and the cost of allocating and storing such nodes (both memory and speed-wise) + * can be significant. For example, after parsing XML string "
", element + * will have 3 children when parse_ws_pcdata is set (child with type node_pcdata and value=" ", + * child with type node_element and name "a", and another child with type node_pcdata and + * value=" "), and only 1 child when parse_ws_pcdata is not set. + * + * This flag is off by default. + */ + const unsigned int parse_ws_pcdata = 0x0008; + + /** + * This flag determines if character and entity references are to be expanded during the parsing + * process. Character references are &#...; or &#x...; (... is Unicode numeric representation of + * character in either decimal (&#...;) or hexadecimal (&#x...;) form), entity references are &...; + * Note that as pugixml does not handle DTD, the only allowed entities are predefined ones - + * &lt;, &gt;, &amp;, &apos; and &quot;. If character/entity reference can not be expanded, it is + * leaved as is, so you can do additional processing later. + * Reference expansion is performed in attribute values and PCDATA content. + * + * This flag is on by default. + */ + const unsigned int parse_escapes = 0x0010; + + /** + * This flag determines if EOL handling (that is, replacing sequences 0x0d 0x0a by a single 0x0a + * character, and replacing all standalone 0x0d characters by 0x0a) is to be performed on input + * data (that is, comments contents, PCDATA/CDATA contents and attribute values). + * + * This flag is on by default. + */ + const unsigned int parse_eol = 0x0020; + + /** + * This flag determines if attribute value normalization should be performed for all attributes. + * This means, that: + * 1. Whitespace characters (new line, tab and space) are replaced with space (' ') + * 2. Afterwards sequences of spaces are replaced with a single space + * 3. Leading/trailing whitespace characters are trimmed + * + * This flag is off by default. + */ + const unsigned int parse_wnorm_attribute = 0x0080; + + /** + * This flag determines if attribute value normalization should be performed for all attributes. + * This means, that whitespace characters (new line, tab and space) are replaced with space (' '). + * Note, that the actions performed while this flag is on are also performed if parse_wnorm_attribute + * is on, so this flag has no effect if parse_wnorm_attribute flag is set. + * New line characters are always treated as if parse_eol is set, i.e. \r\n is converted to single space. + * + * This flag is on by default. + */ + const unsigned int parse_wconv_attribute = 0x0040; + + /** + * This flag determines if XML document declaration (this node has the form of in XML) + * are to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed + * and checked for correctness. + * + * The corresponding node in DOM tree will have type node_declaration, name "xml" and attributes, + * if any. + * + * This flag is off by default. + */ + const unsigned int parse_declaration = 0x0100; + + /** + * This is the default set of flags. It includes parsing CDATA sections (comments/PIs are not + * parsed), performing character and entity reference expansion, replacing whitespace characters + * with spaces in attribute values and performing EOL handling. Note, that PCDATA sections + * consisting only of whitespace characters are not parsed (by default) for performance reasons. + */ + const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; + + /** + * These flags determine the encoding of input data for XML document. Default mode is encoding_auto, + * which means that document encoding is auto-detected from BOM and necessary encoding conversions are + * applied. You can override this mode by using any of the specific encodings. + */ + enum xml_encoding + { + encoding_auto, //!< Auto-detect input encoding using BOM or < / 800 + PUGIXML_DEPRECATED +#endif + const unsigned int format_write_bom_utf8 = format_write_bom; + + /** + * If this flag is on, no indentation is performed and no line breaks are written to output file. + * This means that the data is written to output stream as is. + * + * This flag is off by default. + */ + const unsigned int format_raw = 0x04; + + /** + * If this flag is on, no default XML declaration is written to output file. + * This means that there will be no XML declaration in output stream unless there was one in XML document + * (i.e. if it was parsed with parse_declaration flag). + * + * This flag is off by default. + */ + const unsigned int format_no_declaration = 0x08; + + /** + * This is the default set of formatting flags. It includes indenting nodes depending on their + * depth in DOM tree. + */ + const unsigned int format_default = format_indent; + + // Forward declarations + struct xml_attribute_struct; + struct xml_node_struct; + + class xml_node_iterator; + class xml_attribute_iterator; + + class xml_tree_walker; + + class xml_node; + + #ifndef PUGIXML_NO_XPATH + class xpath_node; + class xpath_node_set; + class xpath_ast_node; + class xpath_allocator; + + /// XPath query return type classification + enum xpath_value_type + { + xpath_type_none, ///< Unknown type (query failed to compile) + xpath_type_node_set, ///< Node set (xpath_node_set) + xpath_type_number, ///< Number + xpath_type_string, ///< String + xpath_type_boolean ///< Boolean + }; + + /// XPath query return type classification + /// \deprecated This type is deprecated and will be removed in future versions; use xpath_value_type instead + typedef xpath_value_type xpath_type_t; + + /** + * A class that holds compiled XPath query and allows to evaluate query result + */ + class PUGIXML_CLASS xpath_query + { + private: + // Non-copyable semantics + xpath_query(const xpath_query&); + xpath_query& operator=(const xpath_query&); + + xpath_allocator* m_alloc; + xpath_ast_node* m_root; + + void compile(const char_t* query); + + public: + /** + * Constructor from string with XPath expression. + * Throws xpath_exception on compilation error, std::bad_alloc on out of memory error. + * + * \param query - string with XPath expression + */ + explicit xpath_query(const char_t* query); + + /** + * Destructor + */ + ~xpath_query(); + + /** + * Get query expression return type + * + * \return expression return type + **/ + xpath_value_type return_type() const; + + /** + * Evaluate expression as boolean value for the context node \a n. + * If expression does not directly evaluate to boolean, the expression result is converted + * as through boolean() XPath function call. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + bool evaluate_boolean(const xml_node& n) const; + + /** + * Evaluate expression as double value for the context node \a n. + * If expression does not directly evaluate to double, the expression result is converted + * as through number() XPath function call. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + double evaluate_number(const xml_node& n) const; + + /** + * Evaluate expression as string value for the context node \a n. + * If expression does not directly evaluate to string, the expression result is converted + * as through string() XPath function call. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + string_t evaluate_string(const xml_node& n) const; + + /** + * Evaluate expression as node set for the context node \a n. + * If expression does not directly evaluate to node set, throws xpath_exception. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + xpath_node_set evaluate_node_set(const xml_node& n) const; + }; + #endif + + /** + * Abstract writer class + * \see xml_node::print + */ + class PUGIXML_CLASS xml_writer + { + public: + /** + * Virtual destructor + */ + virtual ~xml_writer() {} + + /** + * Write memory chunk into stream/file/whatever + * + * \param data - data pointer + * \param size - data size + */ + virtual void write(const void* data, size_t size) = 0; + }; + + /** xml_writer implementation for FILE* + * \see xml_writer + */ + class PUGIXML_CLASS xml_writer_file: public xml_writer + { + public: + /** + * Construct writer instance + * + * \param file - this is FILE* object, void* is used to avoid header dependencies on stdio + */ + xml_writer_file(void* file); + + virtual void write(const void* data, size_t size); + + private: + void* file; + }; + + #ifndef PUGIXML_NO_STL + /** xml_writer implementation for streams + * \see xml_writer + */ + class PUGIXML_CLASS xml_writer_stream: public xml_writer + { + public: + /** + * Construct writer instance + * + * \param stream - output stream object + */ + xml_writer_stream(std::basic_ostream >& stream); + + /** + * Construct writer instance + * + * \param stream - output stream object + */ + xml_writer_stream(std::basic_ostream >& stream); + + virtual void write(const void* data, size_t size); + + private: + std::basic_ostream >* narrow_stream; + std::basic_ostream >* wide_stream; + }; + #endif + + /** + * A light-weight wrapper for manipulating attributes in DOM tree. + * Note: xml_attribute does not allocate any memory for the attribute it wraps; it only wraps a + * pointer to existing attribute. + */ + class PUGIXML_CLASS xml_attribute + { + friend class xml_attribute_iterator; + friend class xml_node; + + private: + xml_attribute_struct* _attr; + + /// \internal Safe bool type +#ifdef __MWERKS__ + typedef bool (xml_attribute::*unspecified_bool_type)() const; +#else + typedef xml_attribute_struct* xml_attribute::*unspecified_bool_type; +#endif + + /// \internal Initializing constructor + explicit xml_attribute(xml_attribute_struct* attr); + + public: + /** + * Default constructor. Constructs an empty attribute. + */ + xml_attribute(); + + public: + /** + * Safe bool conversion. + * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. + */ + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator==(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator!=(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<=(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>=(const xml_attribute& r) const; + + public: + /** + * Get next attribute in attribute list of node that contains the attribute. + * + * \return next attribute, if any; empty attribute otherwise + */ + xml_attribute next_attribute() const; + + /** + * Get previous attribute in attribute list of node that contains the attribute. + * + * \return previous attribute, if any; empty attribute otherwise + */ + xml_attribute previous_attribute() const; + + /** + * Cast attribute value as int. + * + * \return attribute value as int, or 0 if conversion did not succeed or attribute is empty + */ + int as_int() const; + + /** + * Cast attribute value as unsigned int. + * + * \return attribute value as unsigned int, or 0 if conversion did not succeed or attribute is empty + * \note values out of non-negative int range (usually [0, 2^31-1]) get clamped to range boundaries + */ + unsigned int as_uint() const; + + /** + * Cast attribute value as double. + * + * \return attribute value as double, or 0.0 if conversion did not succeed or attribute is empty + */ + double as_double() const; + + /** + * Cast attribute value as float. + * + * \return attribute value as float, or 0.0f if conversion did not succeed or attribute is empty + */ + float as_float() const; + + /** + * Cast attribute value as bool. Returns true for attributes with values that start with '1', + * 't', 'T', 'y', 'Y', returns false for other attributes. + * + * \return attribute value as bool, or false if conversion did not succeed or attribute is empty + */ + bool as_bool() const; + + /// \internal Document order or 0 if not set + /// \deprecated This function is deprecated + PUGIXML_DEPRECATED unsigned int document_order() const; + + public: + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(const char_t* rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(unsigned int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(double rhs); + + /** + * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false). + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(bool rhs); + + /** + * Set attribute name to \a rhs. + * + * \param rhs - new attribute name + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_name(const char_t* rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(const char_t* rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(unsigned int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(double rhs); + + /** + * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false). + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(bool rhs); + + public: + /** + * Check if attribute is empty. + * + * \return true if attribute is empty, false otherwise + */ + bool empty() const; + + public: + /** + * Get attribute name. + * + * \return attribute name, or "" if attribute is empty + */ + const char_t* name() const; + + /** + * Get attribute value. + * + * \return attribute value, or "" if attribute is empty + */ + const char_t* value() const; + }; + +#ifdef __BORLANDC__ + // Borland C++ workaround + bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs); + bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs); +#endif + + /** + * A light-weight wrapper for manipulating nodes in DOM tree. + * Note: xml_node does not allocate any memory for the node it wraps; it only wraps a pointer to + * existing node. + */ + class PUGIXML_CLASS xml_node + { + friend class xml_attribute_iterator; + friend class xml_node_iterator; + + protected: + xml_node_struct* _root; + + /// \internal Safe bool type +#ifdef __MWERKS__ + typedef bool (xml_node::*unspecified_bool_type)() const; +#else + typedef xml_node_struct* xml_node::*unspecified_bool_type; +#endif + + /// \internal Initializing constructor + explicit xml_node(xml_node_struct* p); + + private: + template void all_elements_by_name_helper(const char_t* name, OutputIterator it) const + { + if (!_root) return; + + for (xml_node node = first_child(); node; node = node.next_sibling()) + { + if (node.type() == node_element) + { + if (impl::strequal(name, node.name())) + { + *it = node; + ++it; + } + + if (node.first_child()) node.all_elements_by_name_helper(name, it); + } + } + } + + template void all_elements_by_name_w_helper(const char_t* name, OutputIterator it) const + { + if (!_root) return; + + for (xml_node node = first_child(); node; node = node.next_sibling()) + { + if (node.type() == node_element) + { + if (impl::strequalwild(name, node.name())) + { + *it = node; + ++it; + } + + if (node.first_child()) node.all_elements_by_name_w_helper(name, it); + } + } + } + + public: + /** + * Default constructor. Constructs an empty node. + */ + xml_node(); + + public: + /** + * Safe bool conversion. + * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. + */ + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator==(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator!=(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<=(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>=(const xml_node& r) const; + + public: + /** + * Node iterator type (for child nodes). + * \see xml_node_iterator + */ + typedef xml_node_iterator iterator; + + /** + * Node iterator type (for child nodes). + * \see xml_attribute_iterator + */ + typedef xml_attribute_iterator attribute_iterator; + + /** + * Access the begin iterator for this node's collection of child nodes. + * + * \return iterator that points to the first child node, or past-the-end iterator if node is empty or has no children + */ + iterator begin() const; + + /** + * Access the end iterator for this node's collection of child nodes. + * + * \return past-the-end iterator for child list + */ + iterator end() const; + + /** + * Access the begin iterator for this node's collection of attributes. + * + * \return iterator that points to the first attribute, or past-the-end iterator if node is empty or has no attributes + */ + attribute_iterator attributes_begin() const; + + /** + * Access the end iterator for this node's collection of attributes. + * + * \return past-the-end iterator for attribute list + */ + attribute_iterator attributes_end() const; + + public: + /** + * Check if node is empty. + * + * \return true if node is empty, false otherwise + */ + bool empty() const; + + public: + /** + * Get node type + * + * \return node type; node_null for empty nodes + */ + xml_node_type type() const; + + /** + * Get node name (element name for element nodes, PI target for PI) + * + * \return node name, if any; "" otherwise + */ + const char_t* name() const; + + /** + * Get node value (comment/PI/PCDATA/CDATA contents, depending on node type) + * + * \return node value, if any; "" otherwise + */ + const char_t* value() const; + + /** + * Get child with the specified name + * + * \param name - child name + * \return child with the specified name, if any; empty node otherwise + */ + xml_node child(const char_t* name) const; + + /** + * Get child with the name that matches specified pattern + * + * \param name - child name pattern + * \return child with the name that matches pattern, if any; empty node otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node child_w(const char_t* name) const; + + /** + * Get attribute with the specified name + * + * \param name - attribute name + * \return attribute with the specified name, if any; empty attribute otherwise + */ + xml_attribute attribute(const char_t* name) const; + + /** + * Get attribute with the name that matches specified pattern + * + * \param name - attribute name pattern + * \return attribute with the name that matches pattern, if any; empty attribute otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_attribute attribute_w(const char_t* name) const; + + /** + * Get first of following sibling nodes with the specified name + * + * \param name - sibling name + * \return node with the specified name, if any; empty node otherwise + */ + xml_node next_sibling(const char_t* name) const; + + /** + * Get first of the following sibling nodes with the name that matches specified pattern + * + * \param name - sibling name pattern + * \return node with the name that matches pattern, if any; empty node otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node next_sibling_w(const char_t* name) const; + + /** + * Get following sibling + * + * \return following sibling node, if any; empty node otherwise + */ + xml_node next_sibling() const; + + /** + * Get first of preceding sibling nodes with the specified name + * + * \param name - sibling name + * \return node with the specified name, if any; empty node otherwise + */ + xml_node previous_sibling(const char_t* name) const; + + /** + * Get first of the preceding sibling nodes with the name that matches specified pattern + * + * \param name - sibling name pattern + * \return node with the name that matches pattern, if any; empty node otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node previous_sibling_w(const char_t* name) const; + + /** + * Get preceding sibling + * + * \return preceding sibling node, if any; empty node otherwise + */ + xml_node previous_sibling() const; + + /** + * Get parent node + * + * \return parent node if any; empty node otherwise + */ + xml_node parent() const; + + /** + * Get root of DOM tree this node belongs to. + * + * \return tree root + */ + xml_node root() const; + + /** + * Get child value of current node; that is, value of the first child node of type PCDATA/CDATA + * + * \return child value of current node, if any; "" otherwise + */ + const char_t* child_value() const; + + /** + * Get child value of child with specified name. \see child_value + * node.child_value(name) is equivalent to node.child(name).child_value() + * + * \param name - child name + * \return child value of specified child node, if any; "" otherwise + */ + const char_t* child_value(const char_t* name) const; + + /** + * Get child value of child with name that matches the specified pattern. \see child_value + * node.child_value_w(name) is equivalent to node.child_w(name).child_value() + * + * \param name - child name pattern + * \return child value of specified child node, if any; "" otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED const char_t* child_value_w(const char_t* name) const; + + public: + /** + * Set node name to \a rhs (for PI/element nodes). \see name + * + * \param rhs - new node name + * \return success flag (call fails if node is of the wrong type or there is not enough memory) + */ + bool set_name(const char_t* rhs); + + /** + * Set node value to \a rhs (for PI/PCDATA/CDATA/comment nodes). \see value + * + * \param rhs - new node value + * \return success flag (call fails if node is of the wrong type or there is not enough memory) + */ + bool set_value(const char_t* rhs); + + /** + * Add attribute with specified name (for element nodes) + * + * \param name - attribute name + * \return added attribute, or empty attribute if there was an error (wrong node type) + */ + xml_attribute append_attribute(const char_t* name); + + /** + * Insert attribute with specified name after \a attr (for element nodes) + * + * \param name - attribute name + * \param attr - attribute to insert a new one after + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr); + + /** + * Insert attribute with specified name before \a attr (for element nodes) + * + * \param name - attribute name + * \param attr - attribute to insert a new one before + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr); + + /** + * Add a copy of the specified attribute (for element nodes) + * + * \param proto - attribute prototype which is to be copied + * \return inserted attribute, or empty attribute if there was an error (wrong node type) + */ + xml_attribute append_copy(const xml_attribute& proto); + + /** + * Insert a copy of the specified attribute after \a attr (for element nodes) + * + * \param proto - attribute prototype which is to be copied + * \param attr - attribute to insert a new one after + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); + + /** + * Insert a copy of the specified attribute before \a attr (for element nodes) + * + * \param proto - attribute prototype which is to be copied + * \param attr - attribute to insert a new one before + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); + + /** + * Add child node with specified type (for element nodes) + * + * \param type - node type + * \return added node, or empty node if there was an error (wrong node type) + */ + xml_node append_child(xml_node_type type = node_element); + + /** + * Insert child node with specified type after \a node (for element nodes) + * + * \param type - node type + * \param node - node to insert a new one after + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_child_after(xml_node_type type, const xml_node& node); + + /** + * Insert child node with specified type before \a node (for element nodes) + * + * \param type - node type + * \param node - node to insert a new one before + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_child_before(xml_node_type type, const xml_node& node); + + /** + * Add a copy of the specified node as a child (for element nodes) + * + * \param proto - node prototype which is to be copied + * \return inserted node, or empty node if there was an error (wrong node type) + */ + xml_node append_copy(const xml_node& proto); + + /** + * Insert a copy of the specified node after \a node (for element nodes) + * + * \param proto - node prototype which is to be copied + * \param node - node to insert a new one after + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_copy_after(const xml_node& proto, const xml_node& node); + + /** + * Insert a copy of the specified node before \a node (for element nodes) + * + * \param proto - node prototype which is to be copied + * \param node - node to insert a new one before + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_copy_before(const xml_node& proto, const xml_node& node); + + /** + * Remove specified attribute + * + * \param a - attribute to be removed + * \return success flag + */ + bool remove_attribute(const xml_attribute& a); + + /** + * Remove attribute with the specified name, if any + * + * \param name - attribute name + * \return success flag + */ + bool remove_attribute(const char_t* name); + + /** + * Remove specified child + * + * \param n - child node to be removed + * \return success flag + */ + bool remove_child(const xml_node& n); + + /** + * Remove child with the specified name, if any + * + * \param name - child name + * \return success flag + */ + bool remove_child(const char_t* name); + + public: + /** + * Get first attribute + * + * \return first attribute, if any; empty attribute otherwise + */ + xml_attribute first_attribute() const; + + /** + * Get last attribute + * + * \return last attribute, if any; empty attribute otherwise + */ + xml_attribute last_attribute() const; + + /** + * Get all elements from subtree with given name + * + * \param name - node name + * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) + * + * \deprecated This function is deprecated + */ + template PUGIXML_DEPRECATED void all_elements_by_name(const char_t* name, OutputIterator it) const + { + all_elements_by_name_helper(name, it); + } + + /** + * Get all elements from subtree with name that matches given pattern + * + * \param name - node name pattern + * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) + * + * \deprecated This function is deprecated + */ + template PUGIXML_DEPRECATED void all_elements_by_name_w(const char_t* name, OutputIterator it) const + { + all_elements_by_name_w_helper(name, it); + } + + /** + * Get first child + * + * \return first child, if any; empty node otherwise + */ + xml_node first_child() const; + + /** + * Get last child + * + * \return last child, if any; empty node otherwise + */ + xml_node last_child() const; + + /** + * Find attribute using predicate + * + * \param pred - predicate, that takes xml_attribute and returns bool + * \return first attribute for which predicate returned true, or empty attribute + */ + template xml_attribute find_attribute(Predicate pred) const + { + if (!_root) return xml_attribute(); + + for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute()) + if (pred(attrib)) + return attrib; + + return xml_attribute(); + } + + /** + * Find child node using predicate + * + * \param pred - predicate, that takes xml_node and returns bool + * \return first child node for which predicate returned true, or empty node + */ + template xml_node find_child(Predicate pred) const + { + if (!_root) return xml_node(); + + for (xml_node node = first_child(); node; node = node.next_sibling()) + if (pred(node)) + return node; + + return xml_node(); + } + + /** + * Find node from subtree using predicate + * + * \param pred - predicate, that takes xml_node and returns bool + * \return first node from subtree for which predicate returned true, or empty node + */ + template xml_node find_node(Predicate pred) const + { + if (!_root) return xml_node(); + + for (xml_node node = first_child(); node; node = node.next_sibling()) + { + if (pred(node)) + return node; + + if (node.first_child()) + { + xml_node found = node.find_node(pred); + if (found) return found; + } + } + + return xml_node(); + } + + /** + * Find child node with the specified name that has specified attribute + * + * \param name - child node name + * \param attr_name - attribute name of child node + * \param attr_value - attribute value of child node + * \return first matching child node, or empty node + */ + xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; + + /** + * Find child node with the specified name that has specified attribute (use pattern matching for node name and attribute name/value) + * + * \param name - pattern for child node name + * \param attr_name - pattern for attribute name of child node + * \param attr_value - pattern for attribute value of child node + * \return first matching child node, or empty node + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; + + /** + * Find child node that has specified attribute + * + * \param attr_name - attribute name of child node + * \param attr_value - attribute value of child node + * \return first matching child node, or empty node + */ + xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; + + /** + * Find child node that has specified attribute (use pattern matching for attribute name/value) + * + * \param attr_name - pattern for attribute name of child node + * \param attr_value - pattern for attribute value of child node + * \return first matching child node, or empty node + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const; + + #ifndef PUGIXML_NO_STL + /** + * Get the absolute node path from root as a text string. + * + * \param delimiter - delimiter character to insert between element names + * \return path string (e.g. '/bookstore/book/author'). + */ + string_t path(char_t delimiter = '/') const; + #endif + + /** + * Search for a node by path. + * \param path - path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative + * to root), '../foo/bar'. + * \param delimiter - delimiter character to use while tokenizing path + * \return matching node, if any; empty node otherwise + */ + xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const; + + /** + * Recursively traverse subtree with xml_tree_walker + * \see xml_tree_walker::begin + * \see xml_tree_walker::for_each + * \see xml_tree_walker::end + * + * \param walker - tree walker to traverse subtree with + * \return traversal result + */ + bool traverse(xml_tree_walker& walker); + + #ifndef PUGIXML_NO_XPATH + /** + * Select single node by evaluating XPath query + * + * \param query - query string + * \return first node from the resulting node set by document order, or empty node if none found + */ + xpath_node select_single_node(const char_t* query) const; + + /** + * Select single node by evaluating XPath query + * + * \param query - compiled query + * \return first node from the resulting node set by document order, or empty node if none found + */ + xpath_node select_single_node(const xpath_query& query) const; + + /** + * Select node set by evaluating XPath query + * + * \param query - query string + * \return resulting node set + */ + xpath_node_set select_nodes(const char_t* query) const; + + /** + * Select node set by evaluating XPath query + * + * \param query - compiled query + * \return resulting node set + */ + xpath_node_set select_nodes(const xpath_query& query) const; + #endif + + /// \internal Document order or 0 if not set + /// \deprecated This function is deprecated + PUGIXML_DEPRECATED unsigned int document_order() const; + + /** + * Print subtree to writer + * + * \param writer - writer object + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + * \param depth - starting depth (used for indentation) + */ + void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + + #ifndef PUGIXML_NO_STL + /** + * Print subtree to stream + * + * \param os - output stream + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + * \param depth - starting depth (used for indentation) + */ + void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + + /** + * Print subtree to stream + * + * \param os - output stream + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + * \param depth - starting depth (used for indentation) + */ + void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const; + #endif + + /** + * Get node offset in parsed file/string (in bytes) for debugging purposes + * + * \return offset in bytes to start of node data, or -1 in case of error + * \note This will return -1 if node information changed to the extent that it's no longer possible to calculate offset, for example + * if element node name has significantly changed; this is guaranteed to return correct offset only for nodes that have not changed + * since parsing. + */ + ptrdiff_t offset_debug() const; + }; + +#ifdef __BORLANDC__ + // Borland C++ workaround + bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs); + bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs); +#endif + + /** + * Child node iterator. + * It's a bidirectional iterator with value type 'xml_node'. + */ + class PUGIXML_CLASS xml_node_iterator + { + friend class xml_node; + + private: + xml_node _wrap; + xml_node _parent; + + /// \internal Initializing constructor + xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent); + + public: + /** + * Iterator traits + */ + typedef ptrdiff_t difference_type; + typedef xml_node value_type; + typedef xml_node* pointer; + typedef xml_node& reference; + + #ifndef PUGIXML_NO_STL + typedef std::bidirectional_iterator_tag iterator_category; + #endif + + /** + * Default constructor + */ + xml_node_iterator(); + + /** + * Initializing constructor + * + * \param node - node that iterator will point at + */ + xml_node_iterator(const xml_node& node); + + /** + * Check if this iterator is equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator==(const xml_node_iterator& rhs) const; + + /** + * Check if this iterator is not equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator!=(const xml_node_iterator& rhs) const; + + /** + * Dereferencing operator + * + * \return reference to the node iterator points at + */ + xml_node& operator*(); + + /** + * Member access operator + * + * \return pointer to the node iterator points at + */ + xml_node* operator->(); + + /** + * Pre-increment operator + * + * \return self + */ + const xml_node_iterator& operator++(); + + /** + * Post-increment operator + * + * \return old value + */ + xml_node_iterator operator++(int); + + /** + * Pre-decrement operator + * + * \return self + */ + const xml_node_iterator& operator--(); + + /** + * Post-decrement operator + * + * \return old value + */ + xml_node_iterator operator--(int); + }; + + /** + * Attribute iterator. + * It's a bidirectional iterator with value type 'xml_attribute'. + */ + class PUGIXML_CLASS xml_attribute_iterator + { + friend class xml_node; + + private: + xml_attribute _wrap; + xml_node _parent; + + /// \internal Initializing constructor + xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent); + + public: + /** + * Iterator traits + */ + typedef ptrdiff_t difference_type; + typedef xml_attribute value_type; + typedef xml_attribute* pointer; + typedef xml_attribute& reference; + + #ifndef PUGIXML_NO_STL + typedef std::bidirectional_iterator_tag iterator_category; + #endif + + /** + * Default constructor + */ + xml_attribute_iterator(); + + /** + * Initializing constructor + * + * \param attr - attribute that iterator will point at + * \param parent - parent node of the attribute + */ + xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent); + + /** + * Check if this iterator is equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator==(const xml_attribute_iterator& rhs) const; + + /** + * Check if this iterator is not equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator!=(const xml_attribute_iterator& rhs) const; + + /** + * Dereferencing operator + * + * \return reference to the node iterator points at + */ + xml_attribute& operator*(); + + /** + * Member access operator + * + * \return pointer to the node iterator points at + */ + xml_attribute* operator->(); + + /** + * Pre-increment operator + * + * \return self + */ + const xml_attribute_iterator& operator++(); + + /** + * Post-increment operator + * + * \return old value + */ + xml_attribute_iterator operator++(int); + + /** + * Pre-decrement operator + * + * \return self + */ + const xml_attribute_iterator& operator--(); + + /** + * Post-decrement operator + * + * \return old value + */ + xml_attribute_iterator operator--(int); + }; + + /** + * Abstract tree walker class + * \see xml_node::traverse + */ + class PUGIXML_CLASS xml_tree_walker + { + friend class xml_node; + + private: + int _depth; + + protected: + /** + * Get node depth + * + * \return node depth + */ + int depth() const; + + public: + /** + * Default constructor + */ + xml_tree_walker(); + + /** + * Virtual destructor + */ + virtual ~xml_tree_walker(); + + public: + /** + * Callback that is called when traversal of node begins. + * + * \return returning false will abort the traversal + */ + virtual bool begin(xml_node&); + + /** + * Callback that is called for each node traversed + * + * \return returning false will abort the traversal + */ + virtual bool for_each(xml_node&) = 0; + + /** + * Callback that is called when traversal of node ends. + * + * \return returning false will abort the traversal + */ + virtual bool end(xml_node&); + }; + + /** + * Struct used to distinguish parsing with ownership transfer from parsing without it. + * \see xml_document::parse + */ + struct transfer_ownership_tag {}; + + /** + * Parsing status enumeration, returned as part of xml_parse_result struct + */ + enum xml_parse_status + { + status_ok = 0, ///< No error + + status_file_not_found, ///< File was not found during load_file() + status_io_error, ///< Error reading from file/stream + status_out_of_memory, ///< Could not allocate memory + status_internal_error, ///< Internal error occurred + + status_unrecognized_tag, ///< Parser could not determine tag type + + status_bad_pi, ///< Parsing error occurred while parsing document declaration/processing instruction () + status_bad_comment, ///< Parsing error occurred while parsing comment () + status_bad_cdata, ///< Parsing error occurred while parsing CDATA section () + status_bad_doctype, ///< Parsing error occurred while parsing document type declaration + status_bad_pcdata, ///< Parsing error occurred while parsing PCDATA section (>...<) + status_bad_start_element, ///< Parsing error occurred while parsing start element tag () + status_bad_attribute, ///< Parsing error occurred while parsing element attribute + status_bad_end_element, ///< Parsing error occurred while parsing end element tag () + status_end_element_mismatch ///< There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) + }; + + /** + * Parser result + */ + struct PUGIXML_CLASS xml_parse_result + { + /// Parsing status (\see xml_parse_status) + xml_parse_status status; + + /// Last parsed offset (in bytes from file/string start) + ptrdiff_t offset; + + /// Source document encoding + xml_encoding encoding; + + /// Cast to bool operator + operator bool() const + { + return status == status_ok; + } + + /// Get error description + const char* description() const; + }; + + /** + * Document class (DOM tree root). + * This class has non-copyable semantics (private copy constructor/assignment operator). + */ + class PUGIXML_CLASS xml_document: public xml_node + { + private: + char_t* _buffer; + + char _memory[192]; + + xml_document(const xml_document&); + const xml_document& operator=(const xml_document&); + + void create(); + void destroy(); + + xml_parse_result load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own); + + public: + /** + * Default constructor, makes empty document + */ + xml_document(); + + /** + * Destructor + */ + ~xml_document(); + + public: + #ifndef PUGIXML_NO_STL + /** + * Load document from stream. + * + * \param stream - stream with XML data + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Load document from stream. + * + * \param stream - stream with XML data + * \param options - parsing options + * \return parsing result + */ + xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default); + #endif + + /** + * Load document from string. String has to be zero-terminated. No encoding conversions are applied. + * + * \param contents - input string + * \param options - parsing options + * \return parsing result + */ + xml_parse_result load(const char_t* contents, unsigned int options = parse_default); + + /** + * Parse the given XML string in-situ. + * The string is modified; you should ensure that string data will persist throughout the + * document's lifetime. Although, document does not gain ownership over the string, so you + * should free the memory occupied by it manually. + * + * \param xmlstr - read/write string with XML data + * \param options - parsing options + * \return parsing result + * + * \deprecated This function is deprecated and will be removed in future versions; use xml_document::load_buffer_inplace instead + */ + PUGIXML_DEPRECATED xml_parse_result parse(char* xmlstr, unsigned int options = parse_default); + + /** + * Parse the given XML string in-situ (gains ownership). + * The string is modified; document gains ownership over the string, so you don't have to worry + * about it's lifetime. + * Call example: doc.parse(transfer_ownership_tag(), string, options); + * + * \param xmlstr - read/write string with XML data + * \param options - parsing options + * \return parsing result + * + * \deprecated This function is deprecated and will be removed in future versions; use xml_document::load_buffer_inplace_own instead + */ + PUGIXML_DEPRECATED xml_parse_result parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options = parse_default); + + /** + * Load document from file + * + * \param path - file path + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Load document from buffer + * + * \param contents - buffer contents + * \param size - buffer size in bytes + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Load document from buffer in-situ. + * The buffer is modified; you should ensure that buffer data will persist throughout the document's + * lifetime. Document does not gain ownership over the buffer, so you should free the buffer memory manually. + * + * \param contents - buffer contents + * \param size - buffer size in bytes + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Load document from buffer in-situ (gains buffer ownership). + * The buffer is modified; you should ensure that buffer data will persist throughout the document's + * lifetime. Document gains ownership over the buffer, so you should allocate the buffer with pugixml + * allocation function. + * + * \param contents - buffer contents + * \param size - buffer size in bytes + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Save XML to writer + * + * \param writer - writer object + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + */ + void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + + #ifndef PUGIXML_NO_STL + /** + * Save XML to stream + * + * \param stream - output stream + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + */ + void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + + /** + * Save XML to stream + * + * \param stream - output stream + * \param indent - indentation string + * \param flags - formatting flags + */ + void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const; + #endif + + /** + * Save XML to file + * + * \param path - file path + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + * \return success flag + */ + bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + + /** + * Compute document order for the whole tree + * Sometimes this makes evaluation of XPath queries faster. + */ + PUGIXML_DEPRECATED void precompute_document_order(); + }; + +#ifndef PUGIXML_NO_XPATH + /** + * XPath exception class. + */ + class PUGIXML_CLASS xpath_exception: public std::exception + { + private: + const char* m_message; + + public: + /** + * Construct exception from static error string + * + * \param message - error string + */ + explicit xpath_exception(const char* message); + + /** + * Return error message + * + * \return error message + */ + virtual const char* what() const throw(); + }; + + /** + * XPath node class. + * + * XPath defines node to be either xml_node or xml_attribute in pugixml terminology, so xpath_node + * is either xml_node or xml_attribute. + */ + class PUGIXML_CLASS xpath_node + { + private: + xml_node m_node; + xml_attribute m_attribute; + + /// \internal Safe bool type + typedef xml_node xpath_node::*unspecified_bool_type; + + public: + /** + * Construct empty XPath node + */ + xpath_node(); + + /** + * Construct XPath node from XML node + * + * \param node - XML node + */ + xpath_node(const xml_node& node); + + /** + * Construct XPath node from XML attribute + * + * \param attribute - XML attribute + * \param parent - attribute's parent node + */ + xpath_node(const xml_attribute& attribute, const xml_node& parent); + + /** + * Get XML node, if any + * + * \return contained XML node, empty node otherwise + */ + xml_node node() const; + + /** + * Get XML attribute, if any + * + * \return contained XML attribute, if any, empty attribute otherwise + */ + xml_attribute attribute() const; + + /** + * Get parent of contained XML attribute, if any + * + * \return parent of contained XML attribute, if any, empty node otherwise + */ + xml_node parent() const; + + /** + * Safe bool conversion. + * Allows xpath_node to be used in a context where boolean variable is expected, such as 'if (node)'. + */ + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + /** + * Compares two XPath nodes + * + * \param n - XPath node to compare to + * \return comparison result + */ + bool operator==(const xpath_node& n) const; + + /** + * Compares two XPath nodes + * + * \param n - XPath node to compare to + * \return comparison result + */ + bool operator!=(const xpath_node& n) const; + }; + +#ifdef __BORLANDC__ + // Borland C++ workaround + bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs); + bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs); +#endif + + /** + * Not necessarily ordered constant collection of XPath nodes + */ + class PUGIXML_CLASS xpath_node_set + { + friend class xpath_ast_node; + + public: + /// Collection type + enum type_t + { + type_unsorted, ///< Not ordered + type_sorted, ///< Sorted by document order (ascending) + type_sorted_reverse ///< Sorted by document order (descending) + }; + + /// Constant iterator type + typedef const xpath_node* const_iterator; + + private: + type_t m_type; + + xpath_node m_storage; + + xpath_node* m_begin; + xpath_node* m_end; + xpath_node* m_eos; + + typedef xpath_node* iterator; + + iterator mut_begin(); + + void push_back(const xpath_node& n); + + void append(const_iterator begin, const_iterator end); + + void truncate(iterator it); + + void remove_duplicates(); + + public: + /** + * Default constructor + * Constructs empty set + */ + xpath_node_set(); + + /** + * Destructor + */ + ~xpath_node_set(); + + /** + * Copy constructor + * + * \param ns - set to copy + */ + xpath_node_set(const xpath_node_set& ns); + + /** + * Assignment operator + * + * \param ns - set to assign + * \return self + */ + xpath_node_set& operator=(const xpath_node_set& ns); + + /** + * Get collection type + * + * \return collection type + */ + type_t type() const; + + /** + * Get collection size + * + * \return collection size + */ + size_t size() const; + + /** + * Get element with the specified index + * + * \param index - requested index + * \return element + */ + const xpath_node& operator[](size_t index) const; + + /** + * Get begin constant iterator for collection + * + * \return begin constant iterator + */ + const_iterator begin() const; + + /** + * Get end iterator for collection + * + * \return end iterator + */ + const_iterator end() const; + + /** + * Sort the collection in ascending/descending order by document order + * + * \param reverse - whether to sort in ascending (false) or descending (true) order + */ + void sort(bool reverse = false); + + /** + * Get first node in the collection by document order + * + * \return first node by document order + * \note set.first() is not equal to set[0], since operator[] does not take document order into account + */ + xpath_node first() const; + + /** + * Return true if collection is empty + * + * \return true if collection is empty, false otherwise + */ + bool empty() const; + }; +#endif + +#ifndef PUGIXML_NO_STL + /** + * Convert wide string to UTF8 + * + * \param str - input wide string string + * \return output UTF8 string + */ + std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const wchar_t* str); + + /** + * Convert UTF8 to wide string + * + * \param str - input UTF8 string + * \return output wide string string + * + * \deprecated This function is deprecated and will be removed in future versions; use as_wide instead + */ + PUGIXML_DEPRECATED std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf16(const char* str); + + /** + * Convert UTF8 to wide string + * + * \param str - input UTF8 string + * \return output wide string string + */ + std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const char* str); +#endif + + /** + * Memory allocation function + * + * \param size - allocation size + * \return pointer to allocated memory on success, NULL on failure + */ + typedef void* (*allocation_function)(size_t size); + + /** + * Memory deallocation function + * + * \param ptr - pointer to memory previously allocated by allocation function + */ + typedef void (*deallocation_function)(void* ptr); + + /** + * Override default memory management functions + * + * All subsequent allocations/deallocations will be performed via supplied functions. Take care not to + * change memory management functions if any xml_document instances are still alive - this is considered + * undefined behaviour (expect crashes/memory damages/etc.). + * + * \param allocate - allocation function + * \param deallocate - deallocation function + * + * \note XPath-related allocations, as well as allocations in functions that return std::string (xml_node::path, as_utf8, as_wide) + * are not performed via these functions. + * \note If you're using parse() with ownership transfer, you have to allocate the buffer you pass to parse() with allocation + * function you set via this function. + */ + void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); + + /** + * Get current memory allocation function + * + * \return memory allocation function + * \see set_memory_management_functions + */ + allocation_function PUGIXML_FUNCTION get_memory_allocation_function(); + + /** + * Get current memory deallocation function + * + * \return memory deallocation function + * \see set_memory_management_functions + */ + deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function(); +} + +#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) +namespace std +{ + // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&); + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&); +} +#endif + +#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) +namespace std +{ + // Workarounds for (non-standard) iterator category detection + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&); + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&); +} +#endif + +#endif + +/** + * Copyright (c) 2006-2010 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ diff --git a/src/pugixpath.cpp b/src/pugixpath.cpp index 8ebc8c8..775706c 100644 --- a/src/pugixpath.cpp +++ b/src/pugixpath.cpp @@ -1,3500 +1,3500 @@ -/** - * pugixml parser - version 0.9 - * -------------------------------------------------------- - * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) - * Report bugs and download new versions at http://code.google.com/p/pugixml/ - * - * This library is distributed under the MIT License. See notice at the end - * of this file. - * - * This work is based on the pugxml parser, which is: - * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) - */ - -#include "pugixml.hpp" - -#ifndef PUGIXML_NO_XPATH - -#include - -#include -#include -#include -#include -#include -#include - -#ifdef PUGIXML_WCHAR_MODE -# include -#endif - -// int32_t -#if !defined(_MSC_VER) || _MSC_VER >= 1600 -# include -#else -typedef __int32 int32_t; -#endif - -#if defined(_MSC_VER) -# pragma warning(disable: 4127) // conditional expression is constant -# pragma warning(disable: 4996) // this function or variable may be unsafe -#endif - -#ifdef __INTEL_COMPILER -# pragma warning(disable: 1478 1786) // function was declared "deprecated" -#endif - -#ifdef __SNC__ -# pragma diag_suppress=237 // controlling expression is constant -#endif - -#include -#include - -// String utilities prototypes -namespace pugi -{ - namespace impl - { - bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); - void widen_ascii(wchar_t* dest, const char* source); - } -} - -namespace -{ - using namespace pugi; - - enum chartypex - { - ctx_space = 1, // \r, \n, space, tab - ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _ - ctx_digit = 4, // 0-9 - ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . - }; - - const unsigned char chartypex_table[256] = - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47 - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63 - 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79 - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95 - 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111 - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127 - - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 - }; - -#ifdef PUGIXML_WCHAR_MODE - #define IS_CHARTYPEX(c, ct) ((static_cast(c) < 128 ? chartypex_table[static_cast(c)] : chartypex_table[128]) & (ct)) -#else - #define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast(c)] & (ct)) -#endif - - bool starts_with(const char_t* string, const char_t* pattern) - { - while (*pattern && *string == *pattern) - { - string++; - pattern++; - } - - return *pattern == 0; - } - - const char_t* find_char(const char_t* s, char_t c) - { - #ifdef PUGIXML_WCHAR_MODE - return wcschr(s, c); - #else - return ::strchr(s, c); - #endif - } - - string_t string_value(const xpath_node& na) - { - if (na.attribute()) - return na.attribute().value(); - else - { - const xml_node& n = na.node(); - - switch (n.type()) - { - case node_pcdata: - case node_cdata: - case node_comment: - case node_pi: - return n.value(); - - case node_document: - case node_element: - { - string_t result; - - xml_node cur = n.first_child(); - - if (cur) - { - do - { - if (cur.type() == node_pcdata || cur.type() == node_cdata) - result += cur.value(); - - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - // Borland C++ workaround - while (!cur.next_sibling() && cur != n && (bool)cur.parent()) - cur = cur.parent(); - - if (cur != n) - cur = cur.next_sibling(); - } - } - while (cur && cur != n); - } - - return result; - } - - default: - return string_t(); - } - } - } - - unsigned int node_height(xml_node n) - { - unsigned int result = 0; - - while (n) - { - ++result; - n = n.parent(); - } - - return result; - } - - // precondition: node_height of ln is <= node_height of rn, ln != rn - bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh) - { - assert(lh <= rh); - - while (lh < rh) - { - --rh; - rn = rn.parent(); - } - - if (ln == rn) return true; - - while (ln.parent() != rn.parent()) - { - ln = ln.parent(); - rn = rn.parent(); - } - - for (; ln; ln = ln.next_sibling()) - if (ln == rn) - return true; - - return false; - } - - bool node_is_ancestor(xml_node parent, xml_node node) - { - while (node && node != parent) node = node.parent(); - - return parent && node == parent; - } - - struct document_order_comparator - { - bool operator()(const xpath_node& lhs, const xpath_node& rhs) const - { - xml_node ln = lhs.node(), rn = rhs.node(); - - if (lhs.attribute() && rhs.attribute()) - { - if (lhs.parent() == rhs.parent()) - { - for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) - if (a == rhs.attribute()) - return true; - - return false; - } - - ln = lhs.parent(); - rn = rhs.parent(); - } - else if (lhs.attribute()) - { - if (lhs.parent() == rhs.node()) return false; - - ln = lhs.parent(); - } - else if (rhs.attribute()) - { - if (rhs.parent() == lhs.node()) return true; - - rn = rhs.parent(); - } - - if (ln == rn) return false; - - unsigned int lh = node_height(ln); - unsigned int rh = node_height(rn); - - return (lh <= rh) ? node_is_before(ln, lh, rn, rh) : !node_is_before(rn, rh, ln, lh); - } - }; - - struct duplicate_comparator - { - bool operator()(const xpath_node& lhs, const xpath_node& rhs) const - { - if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; - else return rhs.attribute() ? false : lhs.node() < rhs.node(); - } - }; - - double gen_nan() - { - #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) - union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1]; - u[0].i = 0x7fc00000; - return u[0].f; - #else - // fallback - const volatile double zero = 0.0; - return zero / zero; - #endif - } - - bool is_nan(double value) - { - #if defined(_MSC_VER) || defined(__BORLANDC__) - return !!_isnan(value); - #elif defined(fpclassify) && defined(FP_NAN) - return fpclassify(value) == FP_NAN; - #else - // fallback - const volatile double v = value; - return v != v; - #endif - } - - const char_t* convert_number_to_string_special(double value) - { - #if defined(_MSC_VER) || defined(__BORLANDC__) - if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; - if (_isnan(value)) return PUGIXML_TEXT("NaN"); - return PUGIXML_TEXT("-Infinity") + (value > 0); - #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) - switch (fpclassify(value)) - { - case FP_NAN: - return PUGIXML_TEXT("NaN"); - - case FP_INFINITE: - return PUGIXML_TEXT("-Infinity") + (value > 0); - - case FP_ZERO: - return PUGIXML_TEXT("0"); - - default: - return 0; - } - #else - // fallback - const volatile double v = value; - - if (v == 0) return PUGIXML_TEXT("0"); - if (v != v) return PUGIXML_TEXT("NaN"); - if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0); - return 0; - #endif - } - - bool convert_number_to_boolean(double value) - { - return (value != 0 && !is_nan(value)); - } - - // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent - void convert_number_to_mantissa_exponent(double value, char* buffer, char** out_mantissa, int* out_exponent) - { - // get a scientific notation value with IEEE DBL_DIG decimals - sprintf(buffer, "%.15e", value); - - // get the exponent (possibly negative) - char* exponent_string = strchr(buffer, 'e'); - assert(exponent_string); - - int exponent = atoi(exponent_string + 1); - - // extract mantissa string: skip sign - char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; - assert(mantissa[0] != '0' && mantissa[1] == '.'); - - // divide mantissa by 10 to eliminate integer part - mantissa[1] = mantissa[0]; - mantissa++; - exponent++; - - // remove extra mantissa digits and zero-terminate mantissa - char* mantissa_end = exponent_string; - - while (mantissa != mantissa_end && *(mantissa_end - 1) == '0') --mantissa_end; - - *mantissa_end = 0; - - // fill results - *out_mantissa = mantissa; - *out_exponent = exponent; - } - - string_t convert_number_to_string(double value) - { - // try special number conversion - const char_t* special = convert_number_to_string_special(value); - if (special) return special; - - // get mantissa + exponent form - char mantissa_buffer[64]; - - char* mantissa; - int exponent; - convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); - - // make the number! - char_t result[512]; - char_t* s = result; - - // sign - if (value < 0) *s++ = '-'; - - // integer part - if (exponent <= 0) - { - *s++ = '0'; - } - else - { - while (exponent > 0) - { - assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9); - *s++ = *mantissa ? *mantissa++ : '0'; - exponent--; - } - } - - // fractional part - if (*mantissa) - { - // decimal point - *s++ = '.'; - - // extra zeroes from negative exponent - while (exponent < 0) - { - *s++ = '0'; - exponent++; - } - - // extra mantissa digits - while (*mantissa) - { - assert((unsigned)(*mantissa - '0') <= 9); - *s++ = *mantissa++; - } - } - - // zero-terminate - assert(s < result + sizeof(result) / sizeof(result[0])); - *s = 0; - - return string_t(result); - } - - bool check_string_to_number_format(const char_t* string) - { - // parse leading whitespace - while (IS_CHARTYPEX(*string, ctx_space)) ++string; - - // parse sign - if (*string == '-') ++string; - - if (!*string) return false; - - // if there is no integer part, there should be a decimal part with at least one digit - if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false; - - // parse integer part - while (IS_CHARTYPEX(*string, ctx_digit)) ++string; - - // parse decimal part - if (*string == '.') - { - ++string; - - while (IS_CHARTYPEX(*string, ctx_digit)) ++string; - } - - // parse trailing whitespace - while (IS_CHARTYPEX(*string, ctx_space)) ++string; - - return *string == 0; - } - - double convert_string_to_number(const char_t* string) - { - // check string format - if (!check_string_to_number_format(string)) return gen_nan(); - - // parse string - #ifdef PUGIXML_WCHAR_MODE - return wcstod(string, 0); - #else - return atof(string); - #endif - } - - double convert_string_to_number(const char_t* begin, const char_t* end) - { - char_t buffer[32]; - - size_t length = static_cast(end - begin); - - if (length < sizeof(buffer) / sizeof(buffer[0])) - { - // optimized on-stack conversion - memcpy(buffer, begin, length * sizeof(char_t)); - buffer[length] = 0; - - return convert_string_to_number(buffer); - } - else - { - // need to make dummy on-heap copy - string_t copy(begin, end); - - return convert_string_to_number(copy.c_str()); - } - } - - double round_nearest(double value) - { - return floor(value + 0.5); - } - - double round_nearest_nzero(double value) - { - // same as round_nearest, but returns -0 for [-0.5, -0] - // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) - return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); - } - - const char_t* local_name(const char_t* name) - { - const char_t* p = find_char(name, ':'); - - return p ? p + 1 : name; - } - - const char_t* namespace_uri(const xml_node& node) - { - const char_t* pos = find_char(node.name(), ':'); - - string_t ns = PUGIXML_TEXT("xmlns"); - - if (pos) - { - ns += ':'; - ns.append(node.name(), pos); - } - - xml_node p = node; - - while (p) - { - xml_attribute a = p.attribute(ns.c_str()); - - if (a) return a.value(); - - p = p.parent(); - } - - return PUGIXML_TEXT(""); - } - - const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent) - { - const char_t* pos = find_char(attr.name(), ':'); - - // Default namespace does not apply to attributes - if (!pos) return PUGIXML_TEXT(""); - - string_t ns = PUGIXML_TEXT("xmlns:"); - ns.append(attr.name(), pos); - - xml_node p = parent; - - while (p) - { - xml_attribute a = p.attribute(ns.c_str()); - - if (a) return a.value(); - - p = p.parent(); - } - - return PUGIXML_TEXT(""); - } - - struct equal_to - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs == rhs; - } - }; - - struct not_equal_to - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs != rhs; - } - }; - - struct less - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs < rhs; - } - }; - - struct less_equal - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs <= rhs; - } - }; -} - -namespace pugi -{ - xpath_exception::xpath_exception(const char* message): m_message(message) - { - } - - const char* xpath_exception::what() const throw() - { - return m_message; - } - - const size_t xpath_memory_block_size = 4096; ///< Memory block size, 4 kb - - class xpath_allocator - { - // disable copy ctor and assignment - xpath_allocator(const xpath_allocator&); - xpath_allocator& operator=(const xpath_allocator&); - - struct memory_block - { - memory_block(): next(0), size(0) - { - } - - memory_block* next; - size_t size; - - char data[xpath_memory_block_size]; - }; - - memory_block* m_root; - - public: - xpath_allocator(): m_root(0) - { - m_root = new memory_block; - } - - ~xpath_allocator() - { - while (m_root) - { - memory_block* cur = m_root->next; - delete m_root; - m_root = cur; - } - } - - void* alloc(size_t size) - { - // align size so that we're able to store pointers in subsequent blocks - size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1); - - if (m_root->size + size <= xpath_memory_block_size) - { - void* buf = m_root->data + m_root->size; - m_root->size += size; - return buf; - } - else - { - memory_block* block; - - if (size > xpath_memory_block_size) - block = static_cast(operator new(size + sizeof(memory_block) - xpath_memory_block_size)); - else - block = new memory_block; - - block->next = m_root; - block->size = size; - - m_root = block; - - return block->data; - } - } - - void* node(); - }; - - xpath_node::xpath_node() - { - } - - xpath_node::xpath_node(const xml_node& node): m_node(node) - { - } - - xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): m_node(parent), m_attribute(attribute) - { - } - - xml_node xpath_node::node() const - { - return m_attribute ? xml_node() : m_node; - } - - xml_attribute xpath_node::attribute() const - { - return m_attribute; - } - - xml_node xpath_node::parent() const - { - return m_attribute ? m_node : m_node.parent(); - } - - xpath_node::operator xpath_node::unspecified_bool_type() const - { - return (m_node || m_attribute) ? &xpath_node::m_node : 0; - } - - bool xpath_node::operator!() const - { - return !(m_node || m_attribute); - } - - bool xpath_node::operator==(const xpath_node& n) const - { - return m_node == n.m_node && m_attribute == n.m_attribute; - } - - bool xpath_node::operator!=(const xpath_node& n) const - { - return m_node != n.m_node || m_attribute != n.m_attribute; - } - -#ifdef __BORLANDC__ - bool operator&&(const xpath_node& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - bool operator||(const xpath_node& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - xpath_node_set::xpath_node_set(): m_type(type_unsorted), m_begin(&m_storage), m_end(&m_storage), m_eos(&m_storage + 1) - { - } - - xpath_node_set::~xpath_node_set() - { - if (m_begin != &m_storage) delete[] m_begin; - } - - xpath_node_set::xpath_node_set(const xpath_node_set& ns): m_type(type_unsorted), m_begin(&m_storage), m_end(&m_storage), m_eos(&m_storage + 1) - { - *this = ns; - } - - xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) - { - if (&ns == this) return *this; - - if (m_begin != &m_storage) delete[] m_begin; - - m_begin = m_end = m_eos = 0; - m_type = ns.m_type; - - if (ns.size() == 1) - { - m_storage = *ns.m_begin; - m_begin = &m_storage; - m_end = m_eos = &m_storage + 1; - } - else - { - append(ns.begin(), ns.end()); - } - - return *this; - } - - xpath_node_set::type_t xpath_node_set::type() const - { - return m_type; - } - - size_t xpath_node_set::size() const - { - return m_end - m_begin; - } - - bool xpath_node_set::empty() const - { - return size() == 0; - } - - const xpath_node& xpath_node_set::operator[](size_t index) const - { - assert(index < size()); - return m_begin[index]; - } - - xpath_node_set::iterator xpath_node_set::mut_begin() - { - return m_begin; - } - - xpath_node_set::const_iterator xpath_node_set::begin() const - { - return m_begin; - } - - xpath_node_set::const_iterator xpath_node_set::end() const - { - return m_end; - } - - void xpath_node_set::sort(bool reverse) - { - std::sort(m_begin, m_end, document_order_comparator()); - - if (reverse) - std::reverse(m_begin, m_end); - - m_type = reverse ? type_sorted_reverse : type_sorted; - } - - void xpath_node_set::push_back(const xpath_node& n) - { - if (m_end == m_eos) - append(&n, &n + 1); - else - { - *m_end = n; - ++m_end; - } - } - - void xpath_node_set::append(const_iterator begin, const_iterator end) - { - if (begin == end) return; - - size_t count = end - begin; - size_t size = m_end - m_begin; - size_t capacity = m_eos - m_begin; - - if (capacity < size + count) - { - if (capacity < 2) capacity = 2; - - while (capacity < size + count) capacity += capacity / 2; - - xpath_node* storage = new xpath_node[capacity]; - std::copy(m_begin, m_end, storage); - - if (m_begin != &m_storage) delete[] m_begin; - - m_begin = storage; - m_end = storage + size; - m_eos = storage + capacity; - } - - std::copy(begin, end, m_end); - m_end += count; - } - - void xpath_node_set::truncate(iterator it) - { - m_end = it; - } - - xpath_node xpath_node_set::first() const - { - if (empty()) return xpath_node(); - - switch (m_type) - { - case type_sorted: return *m_begin; - case type_sorted_reverse: return *(m_end - 1); - case type_unsorted: return *std::min_element(begin(), end(), document_order_comparator()); - default: return xpath_node(); - } - } - - void xpath_node_set::remove_duplicates() - { - if (m_type == type_unsorted) - { - std::sort(m_begin, m_end, duplicate_comparator()); - } - - truncate(std::unique(m_begin, m_end)); - } - - struct xpath_context - { - xml_node root; - xpath_node n; - size_t position, size; - }; - - enum lexeme_t - { - lex_none = 0, - lex_equal, - lex_not_equal, - lex_less, - lex_greater, - lex_less_or_equal, - lex_greater_or_equal, - lex_plus, - lex_minus, - lex_multiply, - lex_union, - lex_var_ref, - lex_open_brace, - lex_close_brace, - lex_quoted_string, - lex_number, - lex_slash, - lex_double_slash, - lex_open_square_brace, - lex_close_square_brace, - lex_string, - lex_comma, - lex_axis_attribute, - lex_dot, - lex_double_dot, - lex_double_colon, - lex_eof - }; - - struct xpath_lexer_string - { - const char_t* begin; - const char_t* end; - - xpath_lexer_string(): begin(0), end(0) - { - } - - bool operator==(const char_t* other) const - { - size_t length = static_cast(end - begin); - - return impl::strequalrange(other, begin, length); - } - }; - - class xpath_lexer - { - // disable copy ctor and assignment - xpath_lexer(const xpath_lexer&); - xpath_lexer& operator=(const xpath_lexer&); - - private: - const char_t* m_cur; - xpath_lexer_string m_cur_lexeme_contents; - - lexeme_t m_cur_lexeme; - - void contents_clear() - { - m_cur_lexeme_contents = xpath_lexer_string(); - } - - public: - explicit xpath_lexer(const char_t* query): m_cur(query) - { - next(); - } - - const char_t* state() const - { - return m_cur; - } - - void next() - { - contents_clear(); - - const char_t* cur = m_cur; - - while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; - - switch (*cur) - { - case 0: - m_cur_lexeme = lex_eof; - break; - - case '>': - if (*(cur+1) == '=') - { - cur += 2; - m_cur_lexeme = lex_greater_or_equal; - } - else - { - cur += 1; - m_cur_lexeme = lex_greater; - } - break; - - case '<': - if (*(cur+1) == '=') - { - cur += 2; - m_cur_lexeme = lex_less_or_equal; - } - else - { - cur += 1; - m_cur_lexeme = lex_less; - } - break; - - case '!': - if (*(cur+1) == '=') - { - cur += 2; - m_cur_lexeme = lex_not_equal; - } - else - { - m_cur_lexeme = lex_none; - } - break; - - case '=': - cur += 1; - m_cur_lexeme = lex_equal; - - break; - - case '+': - cur += 1; - m_cur_lexeme = lex_plus; - - break; - - case '-': - cur += 1; - m_cur_lexeme = lex_minus; - - break; - - case '*': - cur += 1; - m_cur_lexeme = lex_multiply; - - break; - - case '|': - cur += 1; - m_cur_lexeme = lex_union; - - break; - - case '$': - cur += 1; - m_cur_lexeme = lex_var_ref; - - break; - - case '(': - cur += 1; - m_cur_lexeme = lex_open_brace; - - break; - - case ')': - cur += 1; - m_cur_lexeme = lex_close_brace; - - break; - - case '[': - cur += 1; - m_cur_lexeme = lex_open_square_brace; - - break; - - case ']': - cur += 1; - m_cur_lexeme = lex_close_square_brace; - - break; - - case ',': - cur += 1; - m_cur_lexeme = lex_comma; - - break; - - case '/': - if (*(cur+1) == '/') - { - cur += 2; - m_cur_lexeme = lex_double_slash; - } - else - { - cur += 1; - m_cur_lexeme = lex_slash; - } - break; - - case '.': - if (*(cur+1) == '.') - { - cur += 2; - m_cur_lexeme = lex_double_dot; - } - else if (IS_CHARTYPEX(*(cur+1), ctx_digit)) - { - m_cur_lexeme_contents.begin = cur; // . - - ++cur; - - while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; - - m_cur_lexeme_contents.end = cur; - - m_cur_lexeme = lex_number; - } - else - { - cur += 1; - m_cur_lexeme = lex_dot; - } - break; - - case '@': - cur += 1; - m_cur_lexeme = lex_axis_attribute; - - break; - - case '"': - case '\'': - { - char_t terminator = *cur; - - ++cur; - - m_cur_lexeme_contents.begin = cur; - while (*cur && *cur != terminator) cur++; - m_cur_lexeme_contents.end = cur; - - if (!*cur) - m_cur_lexeme = lex_none; - else - { - cur += 1; - m_cur_lexeme = lex_quoted_string; - } - - break; - } - - case ':': - if (*(cur+1) == ':') - { - cur += 2; - m_cur_lexeme = lex_double_colon; - } - else - { - m_cur_lexeme = lex_none; - } - break; - - default: - if (IS_CHARTYPEX(*cur, ctx_digit)) - { - m_cur_lexeme_contents.begin = cur; - - while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; - - if (*cur == '.') - { - cur++; - - while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; - } - - m_cur_lexeme_contents.end = cur; - - m_cur_lexeme = lex_number; - } - else if (IS_CHARTYPEX(*cur, ctx_start_symbol)) - { - m_cur_lexeme_contents.begin = cur; - - while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - - if (cur[0] == ':') - { - if (cur[1] == '*') // namespace test ncname:* - { - cur += 2; // :* - } - else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname - { - cur++; // : - - while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - } - } - - m_cur_lexeme_contents.end = cur; - - while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; - - m_cur_lexeme = lex_string; - } - else - { - throw xpath_exception("Unrecognized token"); - } - } - - m_cur = cur; - } - - lexeme_t current() const - { - return m_cur_lexeme; - } - - const xpath_lexer_string& contents() const - { - return m_cur_lexeme_contents; - } - }; - - enum ast_type_t - { - ast_none, - ast_op_or, // left or right - ast_op_and, // left and right - ast_op_equal, // left = right - ast_op_not_equal, // left != right - ast_op_less, // left < right - ast_op_greater, // left > right - ast_op_less_or_equal, // left <= right - ast_op_greater_or_equal, // left >= right - ast_op_add, // left + right - ast_op_subtract, // left - right - ast_op_multiply, // left * right - ast_op_divide, // left / right - ast_op_mod, // left % right - ast_op_negate, // left - right - ast_op_union, // left | right - ast_predicate, // apply predicate to set; next points to next predicate - ast_filter, // select * from left where right - ast_filter_posinv, // select * from left where right; proximity position invariant - ast_string_constant, // string constant - ast_number_constant, // number constant - ast_func_last, // last() - ast_func_position, // position() - ast_func_count, // count(left) - ast_func_id, // id(left) - ast_func_local_name_0, // local-name() - ast_func_local_name_1, // local-name(left) - ast_func_namespace_uri_0, // namespace-uri() - ast_func_namespace_uri_1, // namespace-uri(left) - ast_func_name_0, // name() - ast_func_name_1, // name(left) - ast_func_string_0, // string() - ast_func_string_1, // string(left) - ast_func_concat, // concat(left, right, siblings) - ast_func_starts_with, // starts_with(left, right) - ast_func_contains, // contains(left, right) - ast_func_substring_before, // substring-before(left, right) - ast_func_substring_after, // substring-after(left, right) - ast_func_substring_2, // substring(left, right) - ast_func_substring_3, // substring(left, right, third) - ast_func_string_length_0, // string-length() - ast_func_string_length_1, // string-length(left) - ast_func_normalize_space_0, // normalize-space() - ast_func_normalize_space_1, // normalize-space(left) - ast_func_translate, // translate(left, right, third) - ast_func_boolean, // boolean(left) - ast_func_not, // not(left) - ast_func_true, // true() - ast_func_false, // false() - ast_func_lang, // lang(left) - ast_func_number_0, // number() - ast_func_number_1, // number(left) - ast_func_sum, // sum(left) - ast_func_floor, // floor(left) - ast_func_ceiling, // ceiling(left) - ast_func_round, // round(left) - ast_step, // process set left with step - ast_step_root // select root node - }; - - enum axis_t - { - axis_ancestor, - axis_ancestor_or_self, - axis_attribute, - axis_child, - axis_descendant, - axis_descendant_or_self, - axis_following, - axis_following_sibling, - axis_namespace, - axis_parent, - axis_preceding, - axis_preceding_sibling, - axis_self - }; - - enum nodetest_t - { - nodetest_none, - nodetest_name, - nodetest_type_node, - nodetest_type_comment, - nodetest_type_pi, - nodetest_type_text, - nodetest_pi, - nodetest_all, - nodetest_all_in_namespace - }; - - template struct axis_to_type - { - static const axis_t axis; - }; - - template const axis_t axis_to_type::axis = N; - - class xpath_ast_node - { - private: - // node type - char m_type; - char m_rettype; - - // for ast_step / ast_predicate - char m_axis; - char m_test; - - // tree node structure - xpath_ast_node* m_left; - xpath_ast_node* m_right; - xpath_ast_node* m_next; - - union - { - // value for ast_string_constant - const char_t* string; - // value for ast_number_constant - double number; - // node test for ast_step (node name/namespace/node type/pi target) - const char_t* nodetest; - } m_data; - - xpath_ast_node(const xpath_ast_node&); - xpath_ast_node& operator=(const xpath_ast_node&); - - template static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp) - { - xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); - - if (lt != xpath_type_node_set && rt != xpath_type_node_set) - { - if (lt == xpath_type_boolean || rt == xpath_type_boolean) - return comp(lhs->eval_boolean(c), rhs->eval_boolean(c)); - else if (lt == xpath_type_number || rt == xpath_type_number) - return comp(lhs->eval_number(c), rhs->eval_number(c)); - else if (lt == xpath_type_string || rt == xpath_type_string) - return comp(lhs->eval_string(c), rhs->eval_string(c)); - } - else if (lt == xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_node_set ls = lhs->eval_node_set(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(string_value(*li), string_value(*ri))) - return true; - } - - return false; - } - else - { - if (lt == xpath_type_node_set) - { - std::swap(lhs, rhs); - std::swap(lt, rt); - } - - if (lt == xpath_type_boolean) - return comp(lhs->eval_boolean(c), rhs->eval_boolean(c)); - else if (lt == xpath_type_number) - { - double l = lhs->eval_number(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) - return true; - } - - return false; - } - else if (lt == xpath_type_string) - { - string_t l = lhs->eval_string(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(l, string_value(*ri))) - return true; - } - - return false; - } - } - - assert(!"Wrong types"); - return false; - } - - template static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp) - { - xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); - - if (lt != xpath_type_node_set && rt != xpath_type_node_set) - return comp(lhs->eval_number(c), rhs->eval_number(c)); - else if (lt == xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_node_set ls = lhs->eval_node_set(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) - { - double l = convert_string_to_number(string_value(*li).c_str()); - - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) - return true; - } - } - - return false; - } - else if (lt != xpath_type_node_set && rt == xpath_type_node_set) - { - double l = lhs->eval_number(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) - return true; - } - - return false; - } - else if (lt == xpath_type_node_set && rt != xpath_type_node_set) - { - xpath_node_set ls = lhs->eval_node_set(c); - double r = rhs->eval_number(c); - - for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) - { - if (comp(convert_string_to_number(string_value(*li).c_str()), r)) - return true; - } - - return false; - } - else - { - assert(!"Wrong types"); - return false; - } - } - - void apply_predicate(xpath_node_set& ns, size_t first, xpath_ast_node* expr, const xpath_context& context) - { - xpath_context c; - c.root = context.root; - - size_t i = 1; - size_t size = ns.size() - first; - - xpath_node_set::iterator last = ns.mut_begin() + first; - - // remove_if... or well, sort of - for (xpath_node_set::iterator it = last; it != ns.end(); ++it, ++i) - { - c.n = *it; - c.position = i; - c.size = size; - - if (expr->rettype() == xpath_type_number) - { - if (expr->eval_number(c) == i) - *last++ = *it; - } - else if (expr->eval_boolean(c)) - *last++ = *it; - } - - ns.truncate(last); - } - - void apply_predicates(xpath_node_set& ns, size_t first, const xpath_context& context) - { - if (ns.size() <= first) return; - - for (xpath_ast_node* pred = m_right; pred; pred = pred->m_next) - { - apply_predicate(ns, first, pred->m_left, context); - } - } - - void step_push(xpath_node_set& ns, const xml_attribute& a, const xml_node& parent) - { - if (!a) return; - - const char_t* name = a.name(); - - // There are no attribute nodes corresponding to attributes that declare namespaces - // That is, "xmlns:..." or "xmlns" - if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return; - - switch (m_test) - { - case nodetest_name: - if (impl::strequal(name, m_data.nodetest)) ns.push_back(xpath_node(a, parent)); - break; - - case nodetest_type_node: - case nodetest_all: - ns.push_back(xpath_node(a, parent)); - break; - - case nodetest_all_in_namespace: - if (starts_with(name, m_data.nodetest)) - ns.push_back(xpath_node(a, parent)); - break; - - default: - ; - } - } - - void step_push(xpath_node_set& ns, const xml_node& n) - { - if (!n) return; - - switch (m_test) - { - case nodetest_name: - if (n.type() == node_element && impl::strequal(n.name(), m_data.nodetest)) ns.push_back(n); - break; - - case nodetest_type_node: - ns.push_back(n); - break; - - case nodetest_type_comment: - if (n.type() == node_comment) - ns.push_back(n); - break; - - case nodetest_type_text: - if (n.type() == node_pcdata || n.type() == node_cdata) - ns.push_back(n); - break; - - case nodetest_type_pi: - if (n.type() == node_pi) - ns.push_back(n); - break; - - case nodetest_pi: - if (n.type() == node_pi && impl::strequal(n.name(), m_data.nodetest)) - ns.push_back(n); - break; - - case nodetest_all: - if (n.type() == node_element) - ns.push_back(n); - break; - - case nodetest_all_in_namespace: - if (n.type() == node_element && starts_with(n.name(), m_data.nodetest)) - ns.push_back(n); - break; - - default: - assert(!"Unknown axis"); - } - } - - template void step_fill(xpath_node_set& ns, const xml_node& n, T) - { - const axis_t axis = T::axis; - - switch (axis) - { - case axis_attribute: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) - step_push(ns, a, n); - - break; - } - - case axis_child: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - for (xml_node c = n.first_child(); c; c = c.next_sibling()) - step_push(ns, c); - - break; - } - - case axis_descendant: - case axis_descendant_or_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - if (axis == axis_descendant_or_self) - step_push(ns, n); - - xml_node cur = n.first_child(); - - if (cur) - { - do - { - step_push(ns, cur); - - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - // Borland C++ workaround - while (!cur.next_sibling() && cur != n && (bool)cur.parent()) - cur = cur.parent(); - - if (cur != n) - cur = cur.next_sibling(); - } - } - while (cur && cur != n); - } - - break; - } - - case axis_following_sibling: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - for (xml_node c = n.next_sibling(); c; c = c.next_sibling()) - step_push(ns, c); - - break; - } - - case axis_preceding_sibling: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; - - for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling()) - step_push(ns, c); - - break; - } - - case axis_following: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - xml_node cur = n; - - // exit from this node so that we don't include descendants - while (cur && !cur.next_sibling()) cur = cur.parent(); - cur = cur.next_sibling(); - - if (cur) - { - for (;;) - { - step_push(ns, cur); - - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - while (cur && !cur.next_sibling()) cur = cur.parent(); - cur = cur.next_sibling(); - - if (!cur) break; - } - } - } - - break; - } - - case axis_preceding: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; - - xml_node cur = n; - - while (cur && !cur.previous_sibling()) cur = cur.parent(); - cur = cur.previous_sibling(); - - if (cur) - { - for (;;) - { - if (cur.last_child()) - cur = cur.last_child(); - else - { - // leaf node, can't be ancestor - step_push(ns, cur); - - if (cur.previous_sibling()) - cur = cur.previous_sibling(); - else - { - do - { - cur = cur.parent(); - if (!cur) break; - - if (!node_is_ancestor(cur, n)) step_push(ns, cur); - } - while (!cur.previous_sibling()); - - cur = cur.previous_sibling(); - - if (!cur) break; - } - } - } - } - - break; - } - - case axis_ancestor: - case axis_ancestor_or_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; - - if (axis == axis_ancestor_or_self) - step_push(ns, n); - - xml_node cur = n.parent(); - - while (cur) - { - step_push(ns, cur); - - cur = cur.parent(); - } - - break; - } - - case axis_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - step_push(ns, n); - - break; - } - - case axis_parent: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - if (n.parent()) step_push(ns, n.parent()); - - break; - } - - default: - assert(!"Unimplemented axis"); - } - } - - template void step_fill(xpath_node_set& ns, const xml_attribute& a, const xml_node& p, T v) - { - const axis_t axis = T::axis; - - switch (axis) - { - case axis_ancestor: - case axis_ancestor_or_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; - - if (axis == axis_ancestor_or_self && m_test == nodetest_type_node) // reject attributes based on principal node type test - step_push(ns, a, p); - - xml_node cur = p; - - while (cur) - { - step_push(ns, cur); - - cur = cur.parent(); - } - - break; - } - - case axis_descendant_or_self: - case axis_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - if (m_test == nodetest_type_node) // reject attributes based on principal node type test - step_push(ns, a, p); - - break; - } - - case axis_following: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - xml_node cur = p; - - for (;;) - { - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - while (cur && !cur.next_sibling()) cur = cur.parent(); - cur = cur.next_sibling(); - - if (!cur) break; - } - - step_push(ns, cur); - } - - break; - } - - case axis_parent: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - step_push(ns, p); - - break; - } - - case axis_preceding: - { - // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding - step_fill(ns, p, v); - break; - } - - default: - assert(!"Unimplemented axis"); - } - } - - template void step_do(xpath_node_set& ns, const xpath_context& c, T v) - { - const axis_t axis = T::axis; - - assert(ns.empty()); - - switch (axis) - { - case axis_ancestor: - case axis_ancestor_or_self: - case axis_descendant_or_self: - case axis_following: - case axis_parent: - case axis_preceding: - case axis_self: - if (m_left) - { - xpath_node_set s = m_left->eval_node_set(c); - - for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it) - { - size_t size = ns.size(); - - if (it->node()) - step_fill(ns, it->node(), v); - else - step_fill(ns, it->attribute(), it->parent(), v); - - apply_predicates(ns, size, c); - } - } - else - { - if (c.n.node()) step_fill(ns, c.n.node(), v); - else step_fill(ns, c.n.attribute(), c.n.parent(), v); - - apply_predicates(ns, 0, c); - } - - break; - - case axis_following_sibling: - case axis_preceding_sibling: - case axis_attribute: - case axis_child: - case axis_descendant: - if (m_left) - { - xpath_node_set s = m_left->eval_node_set(c); - - for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it) - { - size_t size = ns.size(); - - if (it->node()) - step_fill(ns, it->node(), v); - - apply_predicates(ns, size, c); - } - } - else if (c.n.node()) - { - step_fill(ns, c.n.node(), v); - - apply_predicates(ns, 0, c); - } - - break; - - case axis_namespace: - break; - - default: - assert(!"Unimplemented axis"); - } - } - - static const char_t* duplicate_string(const xpath_lexer_string& value, xpath_allocator& a) - { - if (value.begin) - { - size_t length = static_cast(value.end - value.begin); - - char_t* c = static_cast(a.alloc((length + 1) * sizeof(char_t))); - memcpy(c, value.begin, length * sizeof(char_t)); - c[length] = 0; - - return c; - } - else return 0; - } - public: - xpath_ast_node(ast_type_t type, xpath_value_type rettype, const xpath_lexer_string& value, xpath_allocator& a): - m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0) - { - assert(type == ast_string_constant); - m_data.string = duplicate_string(value, a); - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value): - m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0) - { - assert(type == ast_number_constant); - m_data.number = value; - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0): - m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(left), m_right(right), m_next(0) - { - } - - xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const xpath_lexer_string& contents, xpath_allocator& a): - m_type((char)type), m_rettype(xpath_type_node_set), m_axis((char)axis), m_test((char)test), m_left(left), m_right(0), m_next(0) - { - m_data.nodetest = duplicate_string(contents, a); - } - - void set_next(xpath_ast_node* value) - { - m_next = value; - } - - void set_right(xpath_ast_node* value) - { - m_right = value; - } - - bool eval_boolean(const xpath_context& c) - { - switch (m_type) - { - case ast_op_or: - if (m_left->eval_boolean(c)) return true; - else return m_right->eval_boolean(c); - - case ast_op_and: - if (!m_left->eval_boolean(c)) return false; - else return m_right->eval_boolean(c); - - case ast_op_equal: - return compare_eq(m_left, m_right, c, equal_to()); - - case ast_op_not_equal: - return compare_eq(m_left, m_right, c, not_equal_to()); - - case ast_op_less: - return compare_rel(m_left, m_right, c, less()); - - case ast_op_greater: - return compare_rel(m_right, m_left, c, less()); - - case ast_op_less_or_equal: - return compare_rel(m_left, m_right, c, less_equal()); - - case ast_op_greater_or_equal: - return compare_rel(m_right, m_left, c, less_equal()); - - case ast_func_starts_with: - return starts_with(m_left->eval_string(c).c_str(), m_right->eval_string(c).c_str()); - - case ast_func_contains: - { - string_t lr = m_left->eval_string(c); - string_t rr = m_right->eval_string(c); - - return rr.empty() || lr.find(rr) != string_t::npos; - } - - case ast_func_boolean: - return m_left->eval_boolean(c); - - case ast_func_not: - return !m_left->eval_boolean(c); - - case ast_func_true: - return true; - - case ast_func_false: - return false; - - case ast_func_lang: - { - if (c.n.attribute()) return false; - - string_t lang = m_left->eval_string(c); - - for (xml_node n = c.n.node(); n; n = n.parent()) - { - xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); - - if (a) - { - const char_t* value = a.value(); - - // strnicmp / strncasecmp is not portable - for (const char_t* lit = lang.c_str(); *lit; ++lit) - { - if (tolower(*lit) != tolower(*value)) return false; - ++value; - } - - return *value == 0 || *value == '-'; - } - } - - return false; - } - - default: - { - switch (m_rettype) - { - case xpath_type_number: - return convert_number_to_boolean(eval_number(c)); - - case xpath_type_string: - return !eval_string(c).empty(); - - case xpath_type_node_set: - return !eval_node_set(c).empty(); - - default: - assert(!"Wrong expression for return type boolean"); - return false; - } - } - } - } - - double eval_number(const xpath_context& c) - { - switch (m_type) - { - case ast_op_add: - return m_left->eval_number(c) + m_right->eval_number(c); - - case ast_op_subtract: - return m_left->eval_number(c) - m_right->eval_number(c); - - case ast_op_multiply: - return m_left->eval_number(c) * m_right->eval_number(c); - - case ast_op_divide: - return m_left->eval_number(c) / m_right->eval_number(c); - - case ast_op_mod: - return fmod(m_left->eval_number(c), m_right->eval_number(c)); - - case ast_op_negate: - return -m_left->eval_number(c); - - case ast_number_constant: - return m_data.number; - - case ast_func_last: - return (double)c.size; - - case ast_func_position: - return (double)c.position; - - case ast_func_count: - return (double)m_left->eval_node_set(c).size(); - - case ast_func_string_length_0: - return (double)string_value(c.n).size(); - - case ast_func_string_length_1: - return (double)m_left->eval_string(c).size(); - - case ast_func_number_0: - return convert_string_to_number(string_value(c.n).c_str()); - - case ast_func_number_1: - return m_left->eval_number(c); - - case ast_func_sum: - { - double r = 0; - - xpath_node_set ns = m_left->eval_node_set(c); - - for (xpath_node_set::const_iterator it = ns.begin(); it != ns.end(); ++it) - r += convert_string_to_number(string_value(*it).c_str()); - - return r; - } - - case ast_func_floor: - { - double r = m_left->eval_number(c); - - return r == r ? floor(r) : r; - } - - case ast_func_ceiling: - { - double r = m_left->eval_number(c); - - return r == r ? ceil(r) : r; - } - - case ast_func_round: - return round_nearest_nzero(m_left->eval_number(c)); - - default: - { - switch (m_rettype) - { - case xpath_type_boolean: - return eval_boolean(c) ? 1 : 0; - - case xpath_type_string: - return convert_string_to_number(eval_string(c).c_str()); - - case xpath_type_node_set: - return convert_string_to_number(eval_string(c).c_str()); - - default: - assert(!"Wrong expression for return type number"); - return 0; - } - - } - } - } - - string_t eval_string(const xpath_context& c) - { - switch (m_type) - { - case ast_string_constant: - return m_data.string; - - case ast_func_local_name_0: - { - xpath_node na = c.n; - - if (na.attribute()) return local_name(na.attribute().name()); - else return local_name(na.node().name()); - } - - case ast_func_local_name_1: - { - xpath_node_set ns = m_left->eval_node_set(c); - if (ns.empty()) return string_t(); - - xpath_node na = ns.first(); - - if (na.attribute()) return local_name(na.attribute().name()); - else return local_name(na.node().name()); - } - - case ast_func_name_0: - { - xpath_node na = c.n; - - if (na.attribute()) return na.attribute().name(); - else return na.node().name(); - } - - case ast_func_name_1: - { - xpath_node_set ns = m_left->eval_node_set(c); - if (ns.empty()) return string_t(); - - xpath_node na = ns.first(); - - if (na.attribute()) return na.attribute().name(); - else return na.node().name(); - } - - case ast_func_namespace_uri_0: - { - xpath_node na = c.n; - - if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); - else return namespace_uri(na.node()); - } - - case ast_func_namespace_uri_1: - { - xpath_node_set ns = m_left->eval_node_set(c); - if (ns.empty()) return string_t(); - - xpath_node na = ns.first(); - - if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); - else return namespace_uri(na.node()); - } - - case ast_func_string_0: - return string_value(c.n); - - case ast_func_string_1: - return m_left->eval_string(c); - - case ast_func_concat: - { - string_t r = m_left->eval_string(c); - - for (xpath_ast_node* n = m_right; n; n = n->m_next) - r += n->eval_string(c); - - return r; - } - - case ast_func_substring_before: - { - string_t s = m_left->eval_string(c); - string_t::size_type pos = s.find(m_right->eval_string(c)); - - if (pos == string_t::npos) return string_t(); - else return string_t(s.begin(), s.begin() + pos); - } - - case ast_func_substring_after: - { - string_t s = m_left->eval_string(c); - string_t p = m_right->eval_string(c); - - string_t::size_type pos = s.find(p); - - if (pos == string_t::npos) return string_t(); - else return string_t(s.begin() + pos + p.length(), s.end()); - } - - case ast_func_substring_2: - { - string_t s = m_left->eval_string(c); - double first = round_nearest(m_right->eval_number(c)); - - if (is_nan(first)) return string_t(); // NaN - else if (first >= s.length() + 1) return string_t(); - - size_t pos = first < 1 ? 1 : (size_t)first; - - return s.substr(pos - 1); - } - - case ast_func_substring_3: - { - string_t s = m_left->eval_string(c); - double first = round_nearest(m_right->eval_number(c)); - double last = first + round_nearest(m_right->m_next->eval_number(c)); - - if (is_nan(first) || is_nan(last)) return string_t(); - else if (first >= s.length() + 1) return string_t(); - else if (first >= last) return string_t(); - - size_t pos = first < 1 ? 1 : (size_t)first; - size_t end = last >= s.length() + 1 ? s.length() + 1 : (size_t)last; - - size_t size_requested = end - pos; - size_t size_to_end = s.length() - pos + 1; - - return s.substr(pos - 1, size_requested < size_to_end ? size_requested : size_to_end); - } - - case ast_func_normalize_space_0: - case ast_func_normalize_space_1: - { - string_t s = m_type == ast_func_normalize_space_0 ? string_value(c.n) : m_left->eval_string(c); - - string_t r; - r.reserve(s.size()); - - for (string_t::const_iterator it = s.begin(); it != s.end(); ++it) - { - if (IS_CHARTYPEX(*it, ctx_space)) - { - if (!r.empty() && r[r.size() - 1] != ' ') - r += ' '; - } - else r += *it; - } - - string_t::size_type pos = r.find_last_not_of(' '); - if (pos == string_t::npos) r = string_t(); - else r.erase(r.begin() + pos + 1, r.end()); - - return r; - } - - case ast_func_translate: - { - string_t s = m_left->eval_string(c); - string_t from = m_right->eval_string(c); - string_t to = m_right->m_next->eval_string(c); - - for (string_t::iterator it = s.begin(); it != s.end(); ) - { - string_t::size_type pos = from.find(*it); - - if (pos == string_t::npos) - ++it; - else if (pos >= to.length()) - it = s.erase(it); - else - *it++ = to[pos]; - } - - return s; - } - - default: - { - switch (m_rettype) - { - case xpath_type_boolean: - return eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"); - - case xpath_type_number: - return convert_number_to_string(eval_number(c)); - - case xpath_type_node_set: - { - xpath_node_set ns = eval_node_set(c); - return ns.empty() ? string_t() : string_value(ns.first()); - } - - default: - assert(!"Wrong expression for return type string"); - return string_t(); - } - } - } - } - - xpath_node_set eval_node_set(const xpath_context& c) - { - switch (m_type) - { - case ast_op_union: - { - xpath_node_set ls = m_left->eval_node_set(c); - xpath_node_set rs = m_right->eval_node_set(c); - - // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother - ls.m_type = xpath_node_set::type_unsorted; - - ls.append(rs.begin(), rs.end()); - - ls.remove_duplicates(); - - return ls; - } - - case ast_filter: - case ast_filter_posinv: - { - xpath_node_set set = m_left->eval_node_set(c); - - // either expression is a number or it contains position() call; sort by document order - if (m_type == ast_filter) set.sort(); - - apply_predicate(set, 0, m_right, c); - - return set; - } - - case ast_func_id: - return xpath_node_set(); - - case ast_step: - { - xpath_node_set ns; - - switch (m_axis) - { - case axis_ancestor: - step_do(ns, c, axis_to_type()); - break; - - case axis_ancestor_or_self: - step_do(ns, c, axis_to_type()); - break; - - case axis_attribute: - step_do(ns, c, axis_to_type()); - break; - - case axis_child: - step_do(ns, c, axis_to_type()); - break; - - case axis_descendant: - step_do(ns, c, axis_to_type()); - break; - - case axis_descendant_or_self: - step_do(ns, c, axis_to_type()); - break; - - case axis_following: - step_do(ns, c, axis_to_type()); - break; - - case axis_following_sibling: - step_do(ns, c, axis_to_type()); - break; - - case axis_namespace: - step_do(ns, c, axis_to_type()); - break; - - case axis_parent: - step_do(ns, c, axis_to_type()); - break; - - case axis_preceding: - step_do(ns, c, axis_to_type()); - break; - - case axis_preceding_sibling: - step_do(ns, c, axis_to_type()); - break; - - case axis_self: - step_do(ns, c, axis_to_type()); - break; - } - - ns.remove_duplicates(); - - return ns; - } - - case ast_step_root: - { - xpath_node_set ns; - - if (c.root) - { - ns.push_back(c.root); - - apply_predicates(ns, 0, c); - } - - return ns; - } - - default: - assert(!"Wrong expression for return type node set"); - return xpath_node_set(); - } - } - - bool is_posinv() - { - switch (m_type) - { - case ast_func_position: - return false; - - case ast_string_constant: - case ast_number_constant: - // $$ case ast_variable: - return true; - - case ast_step: - case ast_step_root: - return true; - - case ast_predicate: - case ast_filter: - case ast_filter_posinv: - return true; - - default: - if (m_left && !m_left->is_posinv()) return false; - - for (xpath_ast_node* n = m_right; n; n = n->m_next) - if (!n->is_posinv()) return false; - - return true; - } - } - - xpath_value_type rettype() const - { - return static_cast(m_rettype); - } - }; - - void* xpath_allocator::node() - { - return alloc(sizeof(xpath_ast_node)); - } - - class xpath_parser - { - private: - xpath_allocator& m_alloc; - xpath_lexer m_lexer; - - xpath_parser(const xpath_parser&); - xpath_parser& operator=(const xpath_parser&); - - xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) - { - assert(argc <= 1); - - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw xpath_exception("Function has to be applied to node set"); - - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); - } - - xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) - { - switch (name.begin[0]) - { - case 'b': - if (name == PUGIXML_TEXT("boolean") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); - - break; - - case 'c': - if (name == PUGIXML_TEXT("count") && argc == 1) - { - if (args[0]->rettype() != xpath_type_node_set) throw xpath_exception("count() has to be applied to node set"); - return new (m_alloc.node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); - } - else if (name == PUGIXML_TEXT("contains") && argc == 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("concat") && argc >= 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("ceiling") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); - - break; - - case 'f': - if (name == PUGIXML_TEXT("false") && argc == 0) - return new (m_alloc.node()) xpath_ast_node(ast_func_false, xpath_type_boolean); - else if (name == PUGIXML_TEXT("floor") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); - - break; - - case 'i': - if (name == PUGIXML_TEXT("id") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); - - break; - - case 'l': - if (name == PUGIXML_TEXT("last") && argc == 0) - return new (m_alloc.node()) xpath_ast_node(ast_func_last, xpath_type_number); - else if (name == PUGIXML_TEXT("lang") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("local-name") && argc <= 1) - return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); - - break; - - case 'n': - if (name == PUGIXML_TEXT("name") && argc <= 1) - return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); - else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) - return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); - else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("not") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("number") && argc <= 1) - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); - - break; - - case 'p': - if (name == PUGIXML_TEXT("position") && argc == 0) - return new (m_alloc.node()) xpath_ast_node(ast_func_position, xpath_type_number); - - break; - - case 'r': - if (name == PUGIXML_TEXT("round") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); - - break; - - case 's': - if (name == PUGIXML_TEXT("string") && argc <= 1) - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); - else if (name == PUGIXML_TEXT("string-length") && argc <= 1) - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]); - else if (name == PUGIXML_TEXT("starts-with") && argc == 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-before") && argc == 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-after") && argc == 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) - return new (m_alloc.node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("sum") && argc == 1) - { - if (args[0]->rettype() != xpath_type_node_set) throw xpath_exception("sum() has to be applied to node set"); - return new (m_alloc.node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); - } - - break; - - case 't': - if (name == PUGIXML_TEXT("translate") && argc == 3) - return new (m_alloc.node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("true") && argc == 0) - return new (m_alloc.node()) xpath_ast_node(ast_func_true, xpath_type_boolean); - - break; - } - - throw xpath_exception("Unrecognized function or wrong parameter count"); - - #ifdef __DMC__ - return 0; // Digital Mars C++ - #endif - } - - axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) - { - specified = true; - - switch (name.begin[0]) - { - case 'a': - if (name == PUGIXML_TEXT("ancestor")) - return axis_ancestor; - else if (name == PUGIXML_TEXT("ancestor-or-self")) - return axis_ancestor_or_self; - else if (name == PUGIXML_TEXT("attribute")) - return axis_attribute; - - break; - - case 'c': - if (name == PUGIXML_TEXT("child")) - return axis_child; - - break; - - case 'd': - if (name == PUGIXML_TEXT("descendant")) - return axis_descendant; - else if (name == PUGIXML_TEXT("descendant-or-self")) - return axis_descendant_or_self; - - break; - - case 'f': - if (name == PUGIXML_TEXT("following")) - return axis_following; - else if (name == PUGIXML_TEXT("following-sibling")) - return axis_following_sibling; - - break; - - case 'n': - if (name == PUGIXML_TEXT("namespace")) - return axis_namespace; - - break; - - case 'p': - if (name == PUGIXML_TEXT("parent")) - return axis_parent; - else if (name == PUGIXML_TEXT("preceding")) - return axis_preceding; - else if (name == PUGIXML_TEXT("preceding-sibling")) - return axis_preceding_sibling; - - break; - - case 's': - if (name == PUGIXML_TEXT("self")) - return axis_self; - - break; - } - - specified = false; - return axis_child; - } - - nodetest_t parse_node_test_type(const xpath_lexer_string& name) - { - switch (name.begin[0]) - { - case 'c': - if (name == PUGIXML_TEXT("comment")) - return nodetest_type_comment; - - break; - - case 'n': - if (name == PUGIXML_TEXT("node")) - return nodetest_type_node; - - break; - - case 'p': - if (name == PUGIXML_TEXT("processing-instruction")) - return nodetest_type_pi; - - break; - - case 't': - if (name == PUGIXML_TEXT("text")) - return nodetest_type_text; - - break; - } - - return nodetest_none; - } - - // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall - xpath_ast_node* parse_primary_expression() - { - switch (m_lexer.current()) - { - case lex_var_ref: - { - throw xpath_exception("Variables are not supported"); - - #ifdef __DMC__ - return 0; // Digital Mars C++ - #endif - } - - case lex_open_brace: - { - m_lexer.next(); - - xpath_ast_node* n = parse_expression(); - - if (m_lexer.current() != lex_close_brace) - throw xpath_exception("Unmatched braces"); - - m_lexer.next(); - - return n; - } - - case lex_quoted_string: - { - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_string_constant, xpath_type_string, m_lexer.contents(), m_alloc); - m_lexer.next(); - - return n; - } - - case lex_number: - { - double value = convert_string_to_number(m_lexer.contents().begin, m_lexer.contents().end); - - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); - m_lexer.next(); - - return n; - } - - case lex_string: - { - xpath_ast_node* args[2] = {0}; - size_t argc = 0; - - xpath_lexer_string function = m_lexer.contents(); - m_lexer.next(); - - xpath_ast_node* last_arg = 0; - - if (m_lexer.current() != lex_open_brace) - throw xpath_exception("Unrecognized function call"); - m_lexer.next(); - - if (m_lexer.current() != lex_close_brace) - args[argc++] = parse_expression(); - - while (m_lexer.current() != lex_close_brace) - { - if (m_lexer.current() != lex_comma) - throw xpath_exception("No comma between function arguments"); - m_lexer.next(); - - xpath_ast_node* n = parse_expression(); - - if (argc < 2) args[argc] = n; - else last_arg->set_next(n); - - argc++; - last_arg = n; - } - - m_lexer.next(); - - return parse_function(function, argc, args); - } - - default: - throw xpath_exception("Unrecognizable primary expression"); - #ifdef __DMC__ - return 0; // Digital Mars C++ - #endif - } - } - - // FilterExpr ::= PrimaryExpr | FilterExpr Predicate - // Predicate ::= '[' PredicateExpr ']' - // PredicateExpr ::= Expr - xpath_ast_node* parse_filter_expression() - { - xpath_ast_node* n = parse_primary_expression(); - - while (m_lexer.current() == lex_open_square_brace) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_expression(); - - if (n->rettype() != xpath_type_node_set) throw xpath_exception("Predicate has to be applied to node set"); - - bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv(); - - n = new (m_alloc.node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr); - - if (m_lexer.current() != lex_close_square_brace) - throw xpath_exception("Unmatched square brace"); - - m_lexer.next(); - } - - return n; - } - - // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep - // AxisSpecifier ::= AxisName '::' | '@'? - // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' - // NameTest ::= '*' | NCName ':' '*' | QName - // AbbreviatedStep ::= '.' | '..' - xpath_ast_node* parse_step(xpath_ast_node* set) - { - if (set && set->rettype() != xpath_type_node_set) - throw xpath_exception("Step has to be applied to node set"); - - bool axis_specified = false; - axis_t axis = axis_child; // implied child axis - - if (m_lexer.current() == lex_axis_attribute) - { - axis = axis_attribute; - axis_specified = true; - - m_lexer.next(); - } - else if (m_lexer.current() == lex_dot) - { - m_lexer.next(); - - return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, xpath_lexer_string(), m_alloc); - } - else if (m_lexer.current() == lex_double_dot) - { - m_lexer.next(); - - return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, xpath_lexer_string(), m_alloc); - } - - nodetest_t nt_type = nodetest_none; - xpath_lexer_string nt_name; - - if (m_lexer.current() == lex_string) - { - // node name test - nt_name = m_lexer.contents(); - m_lexer.next(); - - // was it an axis name? - if (m_lexer.current() == lex_double_colon) - { - // parse axis name - if (axis_specified) throw xpath_exception("Two axis specifiers in one step"); - - axis = parse_axis_name(nt_name, axis_specified); - - if (!axis_specified) throw xpath_exception("Unknown axis"); - - // read actual node test - m_lexer.next(); - - if (m_lexer.current() == lex_multiply) - { - nt_type = nodetest_all; - nt_name = xpath_lexer_string(); - m_lexer.next(); - } - else if (m_lexer.current() == lex_string) - { - nt_name = m_lexer.contents(); - m_lexer.next(); - } - else throw xpath_exception("Unrecognized node test"); - } - - if (nt_type == nodetest_none) - { - // node type test or processing-instruction - if (m_lexer.current() == lex_open_brace) - { - m_lexer.next(); - - if (m_lexer.current() == lex_close_brace) - { - m_lexer.next(); - - nt_type = parse_node_test_type(nt_name); - - if (nt_type == nodetest_none) throw xpath_exception("Unrecognized node type"); - - nt_name = xpath_lexer_string(); - } - else if (nt_name == PUGIXML_TEXT("processing-instruction")) - { - if (m_lexer.current() != lex_quoted_string) - throw xpath_exception("Only literals are allowed as arguments to processing-instruction()"); - - nt_type = nodetest_pi; - nt_name = m_lexer.contents(); - m_lexer.next(); - - if (m_lexer.current() != lex_close_brace) - throw xpath_exception("Unmatched brace near processing-instruction()"); - m_lexer.next(); - } - else - throw xpath_exception("Unmatched brace near node type test"); - - } - // QName or NCName:* - else - { - const char_t* colon_pos = std::char_traits::find(nt_name.begin, static_cast(nt_name.end - nt_name.begin), ':'); - - if (colon_pos && colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:* - { - nt_name.end--; // erase * - - nt_type = nodetest_all_in_namespace; - } - else nt_type = nodetest_name; - } - } - } - else if (m_lexer.current() == lex_multiply) - { - nt_type = nodetest_all; - m_lexer.next(); - } - else throw xpath_exception("Unrecognized node test"); - - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name, m_alloc); - - xpath_ast_node* last = 0; - - while (m_lexer.current() == lex_open_square_brace) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_expression(); - - xpath_ast_node* pred = new (m_alloc.node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr); - - if (m_lexer.current() != lex_close_square_brace) - throw xpath_exception("Unmatched square brace"); - m_lexer.next(); - - if (last) last->set_next(pred); - else n->set_right(pred); - - last = pred; - } - - return n; - } - - // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step - xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) - { - xpath_ast_node* n = parse_step(set); - - while (m_lexer.current() == lex_slash || m_lexer.current() == lex_double_slash) - { - lexeme_t l = m_lexer.current(); - m_lexer.next(); - - if (l == lex_double_slash) - n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); - - n = parse_step(n); - } - - return n; - } - - // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath - // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath - xpath_ast_node* parse_location_path() - { - if (m_lexer.current() == lex_slash) - { - m_lexer.next(); - - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - - // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path - lexeme_t l = m_lexer.current(); - - if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) - return parse_relative_location_path(n); - else - return n; - } - else if (m_lexer.current() == lex_double_slash) - { - m_lexer.next(); - - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); - - return parse_relative_location_path(n); - } - else - { - return parse_relative_location_path(0); - } - } - - // PathExpr ::= LocationPath - // | FilterExpr - // | FilterExpr '/' RelativeLocationPath - // | FilterExpr '//' RelativeLocationPath - xpath_ast_node* parse_path_expression() - { - // Clarification. - // PathExpr begins with either LocationPath or FilterExpr. - // FilterExpr begins with PrimaryExpr - // PrimaryExpr begins with '$' in case of it being a variable reference, - // '(' in case of it being an expression, string literal, number constant or - // function call. - - if (m_lexer.current() == lex_var_ref || m_lexer.current() == lex_open_brace || - m_lexer.current() == lex_quoted_string || m_lexer.current() == lex_number || - m_lexer.current() == lex_string) - { - if (m_lexer.current() == lex_string) - { - // This is either a function call, or not - if not, we shall proceed with location path - const char_t* state = m_lexer.state(); - - while (IS_CHARTYPEX(*state, ctx_space)) ++state; - - if (*state != '(') return parse_location_path(); - - // This looks like a function call; however this still can be a node-test. Check it. - if (parse_node_test_type(m_lexer.contents()) != nodetest_none) return parse_location_path(); - } - - xpath_ast_node* n = parse_filter_expression(); - - if (m_lexer.current() == lex_slash || m_lexer.current() == lex_double_slash) - { - lexeme_t l = m_lexer.current(); - m_lexer.next(); - - if (l == lex_double_slash) - { - if (n->rettype() != xpath_type_node_set) throw xpath_exception("Step has to be applied to node set"); - - n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); - } - - // select from location path - return parse_relative_location_path(n); - } - - return n; - } - else return parse_location_path(); - } - - // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr - xpath_ast_node* parse_union_expression() - { - xpath_ast_node* n = parse_path_expression(); - - while (m_lexer.current() == lex_union) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_union_expression(); - - if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set) - throw xpath_exception("Union operator has to be applied to node sets"); - - n = new (m_alloc.node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr); - } - - return n; - } - - // UnaryExpr ::= UnionExpr | '-' UnaryExpr - xpath_ast_node* parse_unary_expression() - { - if (m_lexer.current() == lex_minus) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_unary_expression(); - - return new (m_alloc.node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); - } - else return parse_union_expression(); - } - - // MultiplicativeExpr ::= UnaryExpr - // | MultiplicativeExpr '*' UnaryExpr - // | MultiplicativeExpr 'div' UnaryExpr - // | MultiplicativeExpr 'mod' UnaryExpr - xpath_ast_node* parse_multiplicative_expression() - { - xpath_ast_node* n = parse_unary_expression(); - - while (m_lexer.current() == lex_multiply || (m_lexer.current() == lex_string && - (m_lexer.contents() == PUGIXML_TEXT("mod") || m_lexer.contents() == PUGIXML_TEXT("div")))) - { - ast_type_t op = m_lexer.current() == lex_multiply ? ast_op_multiply : - m_lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod; - m_lexer.next(); - - xpath_ast_node* expr = parse_unary_expression(); - - n = new (m_alloc.node()) xpath_ast_node(op, xpath_type_number, n, expr); - } - - return n; - } - - // AdditiveExpr ::= MultiplicativeExpr - // | AdditiveExpr '+' MultiplicativeExpr - // | AdditiveExpr '-' MultiplicativeExpr - xpath_ast_node* parse_additive_expression() - { - xpath_ast_node* n = parse_multiplicative_expression(); - - while (m_lexer.current() == lex_plus || m_lexer.current() == lex_minus) - { - lexeme_t l = m_lexer.current(); - - m_lexer.next(); - - xpath_ast_node* expr = parse_multiplicative_expression(); - - n = new (m_alloc.node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr); - } - - return n; - } - - // RelationalExpr ::= AdditiveExpr - // | RelationalExpr '<' AdditiveExpr - // | RelationalExpr '>' AdditiveExpr - // | RelationalExpr '<=' AdditiveExpr - // | RelationalExpr '>=' AdditiveExpr - xpath_ast_node* parse_relational_expression() - { - xpath_ast_node* n = parse_additive_expression(); - - while (m_lexer.current() == lex_less || m_lexer.current() == lex_less_or_equal || - m_lexer.current() == lex_greater || m_lexer.current() == lex_greater_or_equal) - { - lexeme_t l = m_lexer.current(); - m_lexer.next(); - - xpath_ast_node* expr = parse_additive_expression(); - - n = new (m_alloc.node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater : - l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr); - } - - return n; - } - - // EqualityExpr ::= RelationalExpr - // | EqualityExpr '=' RelationalExpr - // | EqualityExpr '!=' RelationalExpr - xpath_ast_node* parse_equality_expression() - { - xpath_ast_node* n = parse_relational_expression(); - - while (m_lexer.current() == lex_equal || m_lexer.current() == lex_not_equal) - { - lexeme_t l = m_lexer.current(); - - m_lexer.next(); - - xpath_ast_node* expr = parse_relational_expression(); - - n = new (m_alloc.node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr); - } - - return n; - } - - // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr - xpath_ast_node* parse_and_expression() - { - xpath_ast_node* n = parse_equality_expression(); - - while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("and")) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_equality_expression(); - - n = new (m_alloc.node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr); - } - - return n; - } - - // OrExpr ::= AndExpr | OrExpr 'or' AndExpr - xpath_ast_node* parse_or_expression() - { - xpath_ast_node* n = parse_and_expression(); - - while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("or")) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_and_expression(); - - n = new (m_alloc.node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr); - } - - return n; - } - - // Expr ::= OrExpr - xpath_ast_node* parse_expression() - { - return parse_or_expression(); - } - - public: - explicit xpath_parser(const char_t* query, xpath_allocator& alloc): m_alloc(alloc), m_lexer(query) - { - } - - xpath_ast_node* parse() - { - xpath_ast_node* result = parse_expression(); - - if (m_lexer.current() != lex_eof) - { - // there are still unparsed tokens left, error - throw xpath_exception("Incorrect query"); - } - - return result; - } - }; - - xpath_query::xpath_query(const char_t* query): m_alloc(0), m_root(0) - { - compile(query); - } - - xpath_query::~xpath_query() - { - delete m_alloc; - } - - void xpath_query::compile(const char_t* query) - { - delete m_alloc; - m_alloc = new xpath_allocator; - - xpath_parser p(query, *m_alloc); - - m_root = p.parse(); - } - - xpath_value_type xpath_query::return_type() const - { - if (!m_root) return xpath_type_none; - - return m_root->rettype(); - } - - bool xpath_query::evaluate_boolean(const xml_node& n) const - { - if (!m_root) return false; - - xpath_context c; - - c.root = n.root(); - c.n = n; - c.position = 1; - c.size = 1; - - return m_root->eval_boolean(c); - } - - double xpath_query::evaluate_number(const xml_node& n) const - { - if (!m_root) return gen_nan(); - - xpath_context c; - - c.root = n.root(); - c.n = n; - c.position = 1; - c.size = 1; - - return m_root->eval_number(c); - } - - string_t xpath_query::evaluate_string(const xml_node& n) const - { - if (!m_root) return string_t(); - - xpath_context c; - - c.root = n.root(); - c.n = n; - c.position = 1; - c.size = 1; - - return m_root->eval_string(c); - } - - xpath_node_set xpath_query::evaluate_node_set(const xml_node& n) const - { - if (!m_root) return xpath_node_set(); - if (m_root->rettype() != xpath_type_node_set) throw xpath_exception("Expression does not evaluate to node set"); - - xpath_context c; - - c.root = n.root(); - c.n = n; - c.position = 1; - c.size = 1; - - return m_root->eval_node_set(c); - } - - xpath_node xml_node::select_single_node(const char_t* query) const - { - xpath_query q(query); - return select_single_node(q); - } - - xpath_node xml_node::select_single_node(const xpath_query& query) const - { - xpath_node_set s = query.evaluate_node_set(*this); - return s.empty() ? xpath_node() : s.first(); - } - - xpath_node_set xml_node::select_nodes(const char_t* query) const - { - xpath_query q(query); - return select_nodes(q); - } - - xpath_node_set xml_node::select_nodes(const xpath_query& query) const - { - return query.evaluate_node_set(*this); - } -} - -#endif - -/** - * Copyright (c) 2006-2010 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/** + * pugixml parser - version 0.9 + * -------------------------------------------------------- + * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at http://code.google.com/p/pugixml/ + * + * This library is distributed under the MIT License. See notice at the end + * of this file. + * + * This work is based on the pugxml parser, which is: + * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) + */ + +#include "pugixml.hpp" + +#ifndef PUGIXML_NO_XPATH + +#include + +#include +#include +#include +#include +#include +#include + +#ifdef PUGIXML_WCHAR_MODE +# include +#endif + +// int32_t +#if !defined(_MSC_VER) || _MSC_VER >= 1600 +# include +#else +typedef __int32 int32_t; +#endif + +#if defined(_MSC_VER) +# pragma warning(disable: 4127) // conditional expression is constant +# pragma warning(disable: 4996) // this function or variable may be unsafe +#endif + +#ifdef __INTEL_COMPILER +# pragma warning(disable: 1478 1786) // function was declared "deprecated" +#endif + +#ifdef __SNC__ +# pragma diag_suppress=237 // controlling expression is constant +#endif + +#include +#include + +// String utilities prototypes +namespace pugi +{ + namespace impl + { + bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); + void widen_ascii(wchar_t* dest, const char* source); + } +} + +namespace +{ + using namespace pugi; + + enum chartypex + { + ctx_space = 1, // \r, \n, space, tab + ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _ + ctx_digit = 4, // 0-9 + ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . + }; + + const unsigned char chartypex_table[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47 + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63 + 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79 + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95 + 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111 + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127 + + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 + }; + +#ifdef PUGIXML_WCHAR_MODE + #define IS_CHARTYPEX(c, ct) ((static_cast(c) < 128 ? chartypex_table[static_cast(c)] : chartypex_table[128]) & (ct)) +#else + #define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast(c)] & (ct)) +#endif + + bool starts_with(const char_t* string, const char_t* pattern) + { + while (*pattern && *string == *pattern) + { + string++; + pattern++; + } + + return *pattern == 0; + } + + const char_t* find_char(const char_t* s, char_t c) + { + #ifdef PUGIXML_WCHAR_MODE + return wcschr(s, c); + #else + return ::strchr(s, c); + #endif + } + + string_t string_value(const xpath_node& na) + { + if (na.attribute()) + return na.attribute().value(); + else + { + const xml_node& n = na.node(); + + switch (n.type()) + { + case node_pcdata: + case node_cdata: + case node_comment: + case node_pi: + return n.value(); + + case node_document: + case node_element: + { + string_t result; + + xml_node cur = n.first_child(); + + if (cur) + { + do + { + if (cur.type() == node_pcdata || cur.type() == node_cdata) + result += cur.value(); + + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + // Borland C++ workaround + while (!cur.next_sibling() && cur != n && (bool)cur.parent()) + cur = cur.parent(); + + if (cur != n) + cur = cur.next_sibling(); + } + } + while (cur && cur != n); + } + + return result; + } + + default: + return string_t(); + } + } + } + + unsigned int node_height(xml_node n) + { + unsigned int result = 0; + + while (n) + { + ++result; + n = n.parent(); + } + + return result; + } + + // precondition: node_height of ln is <= node_height of rn, ln != rn + bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh) + { + assert(lh <= rh); + + while (lh < rh) + { + --rh; + rn = rn.parent(); + } + + if (ln == rn) return true; + + while (ln.parent() != rn.parent()) + { + ln = ln.parent(); + rn = rn.parent(); + } + + for (; ln; ln = ln.next_sibling()) + if (ln == rn) + return true; + + return false; + } + + bool node_is_ancestor(xml_node parent, xml_node node) + { + while (node && node != parent) node = node.parent(); + + return parent && node == parent; + } + + struct document_order_comparator + { + bool operator()(const xpath_node& lhs, const xpath_node& rhs) const + { + xml_node ln = lhs.node(), rn = rhs.node(); + + if (lhs.attribute() && rhs.attribute()) + { + if (lhs.parent() == rhs.parent()) + { + for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) + if (a == rhs.attribute()) + return true; + + return false; + } + + ln = lhs.parent(); + rn = rhs.parent(); + } + else if (lhs.attribute()) + { + if (lhs.parent() == rhs.node()) return false; + + ln = lhs.parent(); + } + else if (rhs.attribute()) + { + if (rhs.parent() == lhs.node()) return true; + + rn = rhs.parent(); + } + + if (ln == rn) return false; + + unsigned int lh = node_height(ln); + unsigned int rh = node_height(rn); + + return (lh <= rh) ? node_is_before(ln, lh, rn, rh) : !node_is_before(rn, rh, ln, lh); + } + }; + + struct duplicate_comparator + { + bool operator()(const xpath_node& lhs, const xpath_node& rhs) const + { + if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; + else return rhs.attribute() ? false : lhs.node() < rhs.node(); + } + }; + + double gen_nan() + { + #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) + union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1]; + u[0].i = 0x7fc00000; + return u[0].f; + #else + // fallback + const volatile double zero = 0.0; + return zero / zero; + #endif + } + + bool is_nan(double value) + { + #if defined(_MSC_VER) || defined(__BORLANDC__) + return !!_isnan(value); + #elif defined(fpclassify) && defined(FP_NAN) + return fpclassify(value) == FP_NAN; + #else + // fallback + const volatile double v = value; + return v != v; + #endif + } + + const char_t* convert_number_to_string_special(double value) + { + #if defined(_MSC_VER) || defined(__BORLANDC__) + if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; + if (_isnan(value)) return PUGIXML_TEXT("NaN"); + return PUGIXML_TEXT("-Infinity") + (value > 0); + #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) + switch (fpclassify(value)) + { + case FP_NAN: + return PUGIXML_TEXT("NaN"); + + case FP_INFINITE: + return PUGIXML_TEXT("-Infinity") + (value > 0); + + case FP_ZERO: + return PUGIXML_TEXT("0"); + + default: + return 0; + } + #else + // fallback + const volatile double v = value; + + if (v == 0) return PUGIXML_TEXT("0"); + if (v != v) return PUGIXML_TEXT("NaN"); + if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0); + return 0; + #endif + } + + bool convert_number_to_boolean(double value) + { + return (value != 0 && !is_nan(value)); + } + + // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent + void convert_number_to_mantissa_exponent(double value, char* buffer, char** out_mantissa, int* out_exponent) + { + // get a scientific notation value with IEEE DBL_DIG decimals + sprintf(buffer, "%.15e", value); + + // get the exponent (possibly negative) + char* exponent_string = strchr(buffer, 'e'); + assert(exponent_string); + + int exponent = atoi(exponent_string + 1); + + // extract mantissa string: skip sign + char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; + assert(mantissa[0] != '0' && mantissa[1] == '.'); + + // divide mantissa by 10 to eliminate integer part + mantissa[1] = mantissa[0]; + mantissa++; + exponent++; + + // remove extra mantissa digits and zero-terminate mantissa + char* mantissa_end = exponent_string; + + while (mantissa != mantissa_end && *(mantissa_end - 1) == '0') --mantissa_end; + + *mantissa_end = 0; + + // fill results + *out_mantissa = mantissa; + *out_exponent = exponent; + } + + string_t convert_number_to_string(double value) + { + // try special number conversion + const char_t* special = convert_number_to_string_special(value); + if (special) return special; + + // get mantissa + exponent form + char mantissa_buffer[64]; + + char* mantissa; + int exponent; + convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); + + // make the number! + char_t result[512]; + char_t* s = result; + + // sign + if (value < 0) *s++ = '-'; + + // integer part + if (exponent <= 0) + { + *s++ = '0'; + } + else + { + while (exponent > 0) + { + assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9); + *s++ = *mantissa ? *mantissa++ : '0'; + exponent--; + } + } + + // fractional part + if (*mantissa) + { + // decimal point + *s++ = '.'; + + // extra zeroes from negative exponent + while (exponent < 0) + { + *s++ = '0'; + exponent++; + } + + // extra mantissa digits + while (*mantissa) + { + assert((unsigned)(*mantissa - '0') <= 9); + *s++ = *mantissa++; + } + } + + // zero-terminate + assert(s < result + sizeof(result) / sizeof(result[0])); + *s = 0; + + return string_t(result); + } + + bool check_string_to_number_format(const char_t* string) + { + // parse leading whitespace + while (IS_CHARTYPEX(*string, ctx_space)) ++string; + + // parse sign + if (*string == '-') ++string; + + if (!*string) return false; + + // if there is no integer part, there should be a decimal part with at least one digit + if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false; + + // parse integer part + while (IS_CHARTYPEX(*string, ctx_digit)) ++string; + + // parse decimal part + if (*string == '.') + { + ++string; + + while (IS_CHARTYPEX(*string, ctx_digit)) ++string; + } + + // parse trailing whitespace + while (IS_CHARTYPEX(*string, ctx_space)) ++string; + + return *string == 0; + } + + double convert_string_to_number(const char_t* string) + { + // check string format + if (!check_string_to_number_format(string)) return gen_nan(); + + // parse string + #ifdef PUGIXML_WCHAR_MODE + return wcstod(string, 0); + #else + return atof(string); + #endif + } + + double convert_string_to_number(const char_t* begin, const char_t* end) + { + char_t buffer[32]; + + size_t length = static_cast(end - begin); + + if (length < sizeof(buffer) / sizeof(buffer[0])) + { + // optimized on-stack conversion + memcpy(buffer, begin, length * sizeof(char_t)); + buffer[length] = 0; + + return convert_string_to_number(buffer); + } + else + { + // need to make dummy on-heap copy + string_t copy(begin, end); + + return convert_string_to_number(copy.c_str()); + } + } + + double round_nearest(double value) + { + return floor(value + 0.5); + } + + double round_nearest_nzero(double value) + { + // same as round_nearest, but returns -0 for [-0.5, -0] + // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) + return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); + } + + const char_t* local_name(const char_t* name) + { + const char_t* p = find_char(name, ':'); + + return p ? p + 1 : name; + } + + const char_t* namespace_uri(const xml_node& node) + { + const char_t* pos = find_char(node.name(), ':'); + + string_t ns = PUGIXML_TEXT("xmlns"); + + if (pos) + { + ns += ':'; + ns.append(node.name(), pos); + } + + xml_node p = node; + + while (p) + { + xml_attribute a = p.attribute(ns.c_str()); + + if (a) return a.value(); + + p = p.parent(); + } + + return PUGIXML_TEXT(""); + } + + const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent) + { + const char_t* pos = find_char(attr.name(), ':'); + + // Default namespace does not apply to attributes + if (!pos) return PUGIXML_TEXT(""); + + string_t ns = PUGIXML_TEXT("xmlns:"); + ns.append(attr.name(), pos); + + xml_node p = parent; + + while (p) + { + xml_attribute a = p.attribute(ns.c_str()); + + if (a) return a.value(); + + p = p.parent(); + } + + return PUGIXML_TEXT(""); + } + + struct equal_to + { + template bool operator()(const T& lhs, const T& rhs) const + { + return lhs == rhs; + } + }; + + struct not_equal_to + { + template bool operator()(const T& lhs, const T& rhs) const + { + return lhs != rhs; + } + }; + + struct less + { + template bool operator()(const T& lhs, const T& rhs) const + { + return lhs < rhs; + } + }; + + struct less_equal + { + template bool operator()(const T& lhs, const T& rhs) const + { + return lhs <= rhs; + } + }; +} + +namespace pugi +{ + xpath_exception::xpath_exception(const char* message): m_message(message) + { + } + + const char* xpath_exception::what() const throw() + { + return m_message; + } + + const size_t xpath_memory_block_size = 4096; ///< Memory block size, 4 kb + + class xpath_allocator + { + // disable copy ctor and assignment + xpath_allocator(const xpath_allocator&); + xpath_allocator& operator=(const xpath_allocator&); + + struct memory_block + { + memory_block(): next(0), size(0) + { + } + + memory_block* next; + size_t size; + + char data[xpath_memory_block_size]; + }; + + memory_block* m_root; + + public: + xpath_allocator(): m_root(0) + { + m_root = new memory_block; + } + + ~xpath_allocator() + { + while (m_root) + { + memory_block* cur = m_root->next; + delete m_root; + m_root = cur; + } + } + + void* alloc(size_t size) + { + // align size so that we're able to store pointers in subsequent blocks + size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1); + + if (m_root->size + size <= xpath_memory_block_size) + { + void* buf = m_root->data + m_root->size; + m_root->size += size; + return buf; + } + else + { + memory_block* block; + + if (size > xpath_memory_block_size) + block = static_cast(operator new(size + sizeof(memory_block) - xpath_memory_block_size)); + else + block = new memory_block; + + block->next = m_root; + block->size = size; + + m_root = block; + + return block->data; + } + } + + void* node(); + }; + + xpath_node::xpath_node() + { + } + + xpath_node::xpath_node(const xml_node& node): m_node(node) + { + } + + xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): m_node(parent), m_attribute(attribute) + { + } + + xml_node xpath_node::node() const + { + return m_attribute ? xml_node() : m_node; + } + + xml_attribute xpath_node::attribute() const + { + return m_attribute; + } + + xml_node xpath_node::parent() const + { + return m_attribute ? m_node : m_node.parent(); + } + + xpath_node::operator xpath_node::unspecified_bool_type() const + { + return (m_node || m_attribute) ? &xpath_node::m_node : 0; + } + + bool xpath_node::operator!() const + { + return !(m_node || m_attribute); + } + + bool xpath_node::operator==(const xpath_node& n) const + { + return m_node == n.m_node && m_attribute == n.m_attribute; + } + + bool xpath_node::operator!=(const xpath_node& n) const + { + return m_node != n.m_node || m_attribute != n.m_attribute; + } + +#ifdef __BORLANDC__ + bool operator&&(const xpath_node& lhs, bool rhs) + { + return (bool)lhs && rhs; + } + + bool operator||(const xpath_node& lhs, bool rhs) + { + return (bool)lhs || rhs; + } +#endif + + xpath_node_set::xpath_node_set(): m_type(type_unsorted), m_begin(&m_storage), m_end(&m_storage), m_eos(&m_storage + 1) + { + } + + xpath_node_set::~xpath_node_set() + { + if (m_begin != &m_storage) delete[] m_begin; + } + + xpath_node_set::xpath_node_set(const xpath_node_set& ns): m_type(type_unsorted), m_begin(&m_storage), m_end(&m_storage), m_eos(&m_storage + 1) + { + *this = ns; + } + + xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) + { + if (&ns == this) return *this; + + if (m_begin != &m_storage) delete[] m_begin; + + m_begin = m_end = m_eos = 0; + m_type = ns.m_type; + + if (ns.size() == 1) + { + m_storage = *ns.m_begin; + m_begin = &m_storage; + m_end = m_eos = &m_storage + 1; + } + else + { + append(ns.begin(), ns.end()); + } + + return *this; + } + + xpath_node_set::type_t xpath_node_set::type() const + { + return m_type; + } + + size_t xpath_node_set::size() const + { + return m_end - m_begin; + } + + bool xpath_node_set::empty() const + { + return size() == 0; + } + + const xpath_node& xpath_node_set::operator[](size_t index) const + { + assert(index < size()); + return m_begin[index]; + } + + xpath_node_set::iterator xpath_node_set::mut_begin() + { + return m_begin; + } + + xpath_node_set::const_iterator xpath_node_set::begin() const + { + return m_begin; + } + + xpath_node_set::const_iterator xpath_node_set::end() const + { + return m_end; + } + + void xpath_node_set::sort(bool reverse) + { + std::sort(m_begin, m_end, document_order_comparator()); + + if (reverse) + std::reverse(m_begin, m_end); + + m_type = reverse ? type_sorted_reverse : type_sorted; + } + + void xpath_node_set::push_back(const xpath_node& n) + { + if (m_end == m_eos) + append(&n, &n + 1); + else + { + *m_end = n; + ++m_end; + } + } + + void xpath_node_set::append(const_iterator begin, const_iterator end) + { + if (begin == end) return; + + size_t count = end - begin; + size_t size = m_end - m_begin; + size_t capacity = m_eos - m_begin; + + if (capacity < size + count) + { + if (capacity < 2) capacity = 2; + + while (capacity < size + count) capacity += capacity / 2; + + xpath_node* storage = new xpath_node[capacity]; + std::copy(m_begin, m_end, storage); + + if (m_begin != &m_storage) delete[] m_begin; + + m_begin = storage; + m_end = storage + size; + m_eos = storage + capacity; + } + + std::copy(begin, end, m_end); + m_end += count; + } + + void xpath_node_set::truncate(iterator it) + { + m_end = it; + } + + xpath_node xpath_node_set::first() const + { + if (empty()) return xpath_node(); + + switch (m_type) + { + case type_sorted: return *m_begin; + case type_sorted_reverse: return *(m_end - 1); + case type_unsorted: return *std::min_element(begin(), end(), document_order_comparator()); + default: return xpath_node(); + } + } + + void xpath_node_set::remove_duplicates() + { + if (m_type == type_unsorted) + { + std::sort(m_begin, m_end, duplicate_comparator()); + } + + truncate(std::unique(m_begin, m_end)); + } + + struct xpath_context + { + xml_node root; + xpath_node n; + size_t position, size; + }; + + enum lexeme_t + { + lex_none = 0, + lex_equal, + lex_not_equal, + lex_less, + lex_greater, + lex_less_or_equal, + lex_greater_or_equal, + lex_plus, + lex_minus, + lex_multiply, + lex_union, + lex_var_ref, + lex_open_brace, + lex_close_brace, + lex_quoted_string, + lex_number, + lex_slash, + lex_double_slash, + lex_open_square_brace, + lex_close_square_brace, + lex_string, + lex_comma, + lex_axis_attribute, + lex_dot, + lex_double_dot, + lex_double_colon, + lex_eof + }; + + struct xpath_lexer_string + { + const char_t* begin; + const char_t* end; + + xpath_lexer_string(): begin(0), end(0) + { + } + + bool operator==(const char_t* other) const + { + size_t length = static_cast(end - begin); + + return impl::strequalrange(other, begin, length); + } + }; + + class xpath_lexer + { + // disable copy ctor and assignment + xpath_lexer(const xpath_lexer&); + xpath_lexer& operator=(const xpath_lexer&); + + private: + const char_t* m_cur; + xpath_lexer_string m_cur_lexeme_contents; + + lexeme_t m_cur_lexeme; + + void contents_clear() + { + m_cur_lexeme_contents = xpath_lexer_string(); + } + + public: + explicit xpath_lexer(const char_t* query): m_cur(query) + { + next(); + } + + const char_t* state() const + { + return m_cur; + } + + void next() + { + contents_clear(); + + const char_t* cur = m_cur; + + while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; + + switch (*cur) + { + case 0: + m_cur_lexeme = lex_eof; + break; + + case '>': + if (*(cur+1) == '=') + { + cur += 2; + m_cur_lexeme = lex_greater_or_equal; + } + else + { + cur += 1; + m_cur_lexeme = lex_greater; + } + break; + + case '<': + if (*(cur+1) == '=') + { + cur += 2; + m_cur_lexeme = lex_less_or_equal; + } + else + { + cur += 1; + m_cur_lexeme = lex_less; + } + break; + + case '!': + if (*(cur+1) == '=') + { + cur += 2; + m_cur_lexeme = lex_not_equal; + } + else + { + m_cur_lexeme = lex_none; + } + break; + + case '=': + cur += 1; + m_cur_lexeme = lex_equal; + + break; + + case '+': + cur += 1; + m_cur_lexeme = lex_plus; + + break; + + case '-': + cur += 1; + m_cur_lexeme = lex_minus; + + break; + + case '*': + cur += 1; + m_cur_lexeme = lex_multiply; + + break; + + case '|': + cur += 1; + m_cur_lexeme = lex_union; + + break; + + case '$': + cur += 1; + m_cur_lexeme = lex_var_ref; + + break; + + case '(': + cur += 1; + m_cur_lexeme = lex_open_brace; + + break; + + case ')': + cur += 1; + m_cur_lexeme = lex_close_brace; + + break; + + case '[': + cur += 1; + m_cur_lexeme = lex_open_square_brace; + + break; + + case ']': + cur += 1; + m_cur_lexeme = lex_close_square_brace; + + break; + + case ',': + cur += 1; + m_cur_lexeme = lex_comma; + + break; + + case '/': + if (*(cur+1) == '/') + { + cur += 2; + m_cur_lexeme = lex_double_slash; + } + else + { + cur += 1; + m_cur_lexeme = lex_slash; + } + break; + + case '.': + if (*(cur+1) == '.') + { + cur += 2; + m_cur_lexeme = lex_double_dot; + } + else if (IS_CHARTYPEX(*(cur+1), ctx_digit)) + { + m_cur_lexeme_contents.begin = cur; // . + + ++cur; + + while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; + + m_cur_lexeme_contents.end = cur; + + m_cur_lexeme = lex_number; + } + else + { + cur += 1; + m_cur_lexeme = lex_dot; + } + break; + + case '@': + cur += 1; + m_cur_lexeme = lex_axis_attribute; + + break; + + case '"': + case '\'': + { + char_t terminator = *cur; + + ++cur; + + m_cur_lexeme_contents.begin = cur; + while (*cur && *cur != terminator) cur++; + m_cur_lexeme_contents.end = cur; + + if (!*cur) + m_cur_lexeme = lex_none; + else + { + cur += 1; + m_cur_lexeme = lex_quoted_string; + } + + break; + } + + case ':': + if (*(cur+1) == ':') + { + cur += 2; + m_cur_lexeme = lex_double_colon; + } + else + { + m_cur_lexeme = lex_none; + } + break; + + default: + if (IS_CHARTYPEX(*cur, ctx_digit)) + { + m_cur_lexeme_contents.begin = cur; + + while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; + + if (*cur == '.') + { + cur++; + + while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; + } + + m_cur_lexeme_contents.end = cur; + + m_cur_lexeme = lex_number; + } + else if (IS_CHARTYPEX(*cur, ctx_start_symbol)) + { + m_cur_lexeme_contents.begin = cur; + + while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; + + if (cur[0] == ':') + { + if (cur[1] == '*') // namespace test ncname:* + { + cur += 2; // :* + } + else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname + { + cur++; // : + + while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; + } + } + + m_cur_lexeme_contents.end = cur; + + while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; + + m_cur_lexeme = lex_string; + } + else + { + throw xpath_exception("Unrecognized token"); + } + } + + m_cur = cur; + } + + lexeme_t current() const + { + return m_cur_lexeme; + } + + const xpath_lexer_string& contents() const + { + return m_cur_lexeme_contents; + } + }; + + enum ast_type_t + { + ast_none, + ast_op_or, // left or right + ast_op_and, // left and right + ast_op_equal, // left = right + ast_op_not_equal, // left != right + ast_op_less, // left < right + ast_op_greater, // left > right + ast_op_less_or_equal, // left <= right + ast_op_greater_or_equal, // left >= right + ast_op_add, // left + right + ast_op_subtract, // left - right + ast_op_multiply, // left * right + ast_op_divide, // left / right + ast_op_mod, // left % right + ast_op_negate, // left - right + ast_op_union, // left | right + ast_predicate, // apply predicate to set; next points to next predicate + ast_filter, // select * from left where right + ast_filter_posinv, // select * from left where right; proximity position invariant + ast_string_constant, // string constant + ast_number_constant, // number constant + ast_func_last, // last() + ast_func_position, // position() + ast_func_count, // count(left) + ast_func_id, // id(left) + ast_func_local_name_0, // local-name() + ast_func_local_name_1, // local-name(left) + ast_func_namespace_uri_0, // namespace-uri() + ast_func_namespace_uri_1, // namespace-uri(left) + ast_func_name_0, // name() + ast_func_name_1, // name(left) + ast_func_string_0, // string() + ast_func_string_1, // string(left) + ast_func_concat, // concat(left, right, siblings) + ast_func_starts_with, // starts_with(left, right) + ast_func_contains, // contains(left, right) + ast_func_substring_before, // substring-before(left, right) + ast_func_substring_after, // substring-after(left, right) + ast_func_substring_2, // substring(left, right) + ast_func_substring_3, // substring(left, right, third) + ast_func_string_length_0, // string-length() + ast_func_string_length_1, // string-length(left) + ast_func_normalize_space_0, // normalize-space() + ast_func_normalize_space_1, // normalize-space(left) + ast_func_translate, // translate(left, right, third) + ast_func_boolean, // boolean(left) + ast_func_not, // not(left) + ast_func_true, // true() + ast_func_false, // false() + ast_func_lang, // lang(left) + ast_func_number_0, // number() + ast_func_number_1, // number(left) + ast_func_sum, // sum(left) + ast_func_floor, // floor(left) + ast_func_ceiling, // ceiling(left) + ast_func_round, // round(left) + ast_step, // process set left with step + ast_step_root // select root node + }; + + enum axis_t + { + axis_ancestor, + axis_ancestor_or_self, + axis_attribute, + axis_child, + axis_descendant, + axis_descendant_or_self, + axis_following, + axis_following_sibling, + axis_namespace, + axis_parent, + axis_preceding, + axis_preceding_sibling, + axis_self + }; + + enum nodetest_t + { + nodetest_none, + nodetest_name, + nodetest_type_node, + nodetest_type_comment, + nodetest_type_pi, + nodetest_type_text, + nodetest_pi, + nodetest_all, + nodetest_all_in_namespace + }; + + template struct axis_to_type + { + static const axis_t axis; + }; + + template const axis_t axis_to_type::axis = N; + + class xpath_ast_node + { + private: + // node type + char m_type; + char m_rettype; + + // for ast_step / ast_predicate + char m_axis; + char m_test; + + // tree node structure + xpath_ast_node* m_left; + xpath_ast_node* m_right; + xpath_ast_node* m_next; + + union + { + // value for ast_string_constant + const char_t* string; + // value for ast_number_constant + double number; + // node test for ast_step (node name/namespace/node type/pi target) + const char_t* nodetest; + } m_data; + + xpath_ast_node(const xpath_ast_node&); + xpath_ast_node& operator=(const xpath_ast_node&); + + template static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp) + { + xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); + + if (lt != xpath_type_node_set && rt != xpath_type_node_set) + { + if (lt == xpath_type_boolean || rt == xpath_type_boolean) + return comp(lhs->eval_boolean(c), rhs->eval_boolean(c)); + else if (lt == xpath_type_number || rt == xpath_type_number) + return comp(lhs->eval_number(c), rhs->eval_number(c)); + else if (lt == xpath_type_string || rt == xpath_type_string) + return comp(lhs->eval_string(c), rhs->eval_string(c)); + } + else if (lt == xpath_type_node_set && rt == xpath_type_node_set) + { + xpath_node_set ls = lhs->eval_node_set(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(string_value(*li), string_value(*ri))) + return true; + } + + return false; + } + else + { + if (lt == xpath_type_node_set) + { + std::swap(lhs, rhs); + std::swap(lt, rt); + } + + if (lt == xpath_type_boolean) + return comp(lhs->eval_boolean(c), rhs->eval_boolean(c)); + else if (lt == xpath_type_number) + { + double l = lhs->eval_number(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) + return true; + } + + return false; + } + else if (lt == xpath_type_string) + { + string_t l = lhs->eval_string(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(l, string_value(*ri))) + return true; + } + + return false; + } + } + + assert(!"Wrong types"); + return false; + } + + template static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp) + { + xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); + + if (lt != xpath_type_node_set && rt != xpath_type_node_set) + return comp(lhs->eval_number(c), rhs->eval_number(c)); + else if (lt == xpath_type_node_set && rt == xpath_type_node_set) + { + xpath_node_set ls = lhs->eval_node_set(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) + { + double l = convert_string_to_number(string_value(*li).c_str()); + + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) + return true; + } + } + + return false; + } + else if (lt != xpath_type_node_set && rt == xpath_type_node_set) + { + double l = lhs->eval_number(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) + return true; + } + + return false; + } + else if (lt == xpath_type_node_set && rt != xpath_type_node_set) + { + xpath_node_set ls = lhs->eval_node_set(c); + double r = rhs->eval_number(c); + + for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) + { + if (comp(convert_string_to_number(string_value(*li).c_str()), r)) + return true; + } + + return false; + } + else + { + assert(!"Wrong types"); + return false; + } + } + + void apply_predicate(xpath_node_set& ns, size_t first, xpath_ast_node* expr, const xpath_context& context) + { + xpath_context c; + c.root = context.root; + + size_t i = 1; + size_t size = ns.size() - first; + + xpath_node_set::iterator last = ns.mut_begin() + first; + + // remove_if... or well, sort of + for (xpath_node_set::iterator it = last; it != ns.end(); ++it, ++i) + { + c.n = *it; + c.position = i; + c.size = size; + + if (expr->rettype() == xpath_type_number) + { + if (expr->eval_number(c) == i) + *last++ = *it; + } + else if (expr->eval_boolean(c)) + *last++ = *it; + } + + ns.truncate(last); + } + + void apply_predicates(xpath_node_set& ns, size_t first, const xpath_context& context) + { + if (ns.size() <= first) return; + + for (xpath_ast_node* pred = m_right; pred; pred = pred->m_next) + { + apply_predicate(ns, first, pred->m_left, context); + } + } + + void step_push(xpath_node_set& ns, const xml_attribute& a, const xml_node& parent) + { + if (!a) return; + + const char_t* name = a.name(); + + // There are no attribute nodes corresponding to attributes that declare namespaces + // That is, "xmlns:..." or "xmlns" + if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return; + + switch (m_test) + { + case nodetest_name: + if (impl::strequal(name, m_data.nodetest)) ns.push_back(xpath_node(a, parent)); + break; + + case nodetest_type_node: + case nodetest_all: + ns.push_back(xpath_node(a, parent)); + break; + + case nodetest_all_in_namespace: + if (starts_with(name, m_data.nodetest)) + ns.push_back(xpath_node(a, parent)); + break; + + default: + ; + } + } + + void step_push(xpath_node_set& ns, const xml_node& n) + { + if (!n) return; + + switch (m_test) + { + case nodetest_name: + if (n.type() == node_element && impl::strequal(n.name(), m_data.nodetest)) ns.push_back(n); + break; + + case nodetest_type_node: + ns.push_back(n); + break; + + case nodetest_type_comment: + if (n.type() == node_comment) + ns.push_back(n); + break; + + case nodetest_type_text: + if (n.type() == node_pcdata || n.type() == node_cdata) + ns.push_back(n); + break; + + case nodetest_type_pi: + if (n.type() == node_pi) + ns.push_back(n); + break; + + case nodetest_pi: + if (n.type() == node_pi && impl::strequal(n.name(), m_data.nodetest)) + ns.push_back(n); + break; + + case nodetest_all: + if (n.type() == node_element) + ns.push_back(n); + break; + + case nodetest_all_in_namespace: + if (n.type() == node_element && starts_with(n.name(), m_data.nodetest)) + ns.push_back(n); + break; + + default: + assert(!"Unknown axis"); + } + } + + template void step_fill(xpath_node_set& ns, const xml_node& n, T) + { + const axis_t axis = T::axis; + + switch (axis) + { + case axis_attribute: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) + step_push(ns, a, n); + + break; + } + + case axis_child: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + for (xml_node c = n.first_child(); c; c = c.next_sibling()) + step_push(ns, c); + + break; + } + + case axis_descendant: + case axis_descendant_or_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + if (axis == axis_descendant_or_self) + step_push(ns, n); + + xml_node cur = n.first_child(); + + if (cur) + { + do + { + step_push(ns, cur); + + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + // Borland C++ workaround + while (!cur.next_sibling() && cur != n && (bool)cur.parent()) + cur = cur.parent(); + + if (cur != n) + cur = cur.next_sibling(); + } + } + while (cur && cur != n); + } + + break; + } + + case axis_following_sibling: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + for (xml_node c = n.next_sibling(); c; c = c.next_sibling()) + step_push(ns, c); + + break; + } + + case axis_preceding_sibling: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; + + for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling()) + step_push(ns, c); + + break; + } + + case axis_following: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + xml_node cur = n; + + // exit from this node so that we don't include descendants + while (cur && !cur.next_sibling()) cur = cur.parent(); + cur = cur.next_sibling(); + + if (cur) + { + for (;;) + { + step_push(ns, cur); + + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + while (cur && !cur.next_sibling()) cur = cur.parent(); + cur = cur.next_sibling(); + + if (!cur) break; + } + } + } + + break; + } + + case axis_preceding: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; + + xml_node cur = n; + + while (cur && !cur.previous_sibling()) cur = cur.parent(); + cur = cur.previous_sibling(); + + if (cur) + { + for (;;) + { + if (cur.last_child()) + cur = cur.last_child(); + else + { + // leaf node, can't be ancestor + step_push(ns, cur); + + if (cur.previous_sibling()) + cur = cur.previous_sibling(); + else + { + do + { + cur = cur.parent(); + if (!cur) break; + + if (!node_is_ancestor(cur, n)) step_push(ns, cur); + } + while (!cur.previous_sibling()); + + cur = cur.previous_sibling(); + + if (!cur) break; + } + } + } + } + + break; + } + + case axis_ancestor: + case axis_ancestor_or_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; + + if (axis == axis_ancestor_or_self) + step_push(ns, n); + + xml_node cur = n.parent(); + + while (cur) + { + step_push(ns, cur); + + cur = cur.parent(); + } + + break; + } + + case axis_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + step_push(ns, n); + + break; + } + + case axis_parent: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + if (n.parent()) step_push(ns, n.parent()); + + break; + } + + default: + assert(!"Unimplemented axis"); + } + } + + template void step_fill(xpath_node_set& ns, const xml_attribute& a, const xml_node& p, T v) + { + const axis_t axis = T::axis; + + switch (axis) + { + case axis_ancestor: + case axis_ancestor_or_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; + + if (axis == axis_ancestor_or_self && m_test == nodetest_type_node) // reject attributes based on principal node type test + step_push(ns, a, p); + + xml_node cur = p; + + while (cur) + { + step_push(ns, cur); + + cur = cur.parent(); + } + + break; + } + + case axis_descendant_or_self: + case axis_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + if (m_test == nodetest_type_node) // reject attributes based on principal node type test + step_push(ns, a, p); + + break; + } + + case axis_following: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + xml_node cur = p; + + for (;;) + { + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + while (cur && !cur.next_sibling()) cur = cur.parent(); + cur = cur.next_sibling(); + + if (!cur) break; + } + + step_push(ns, cur); + } + + break; + } + + case axis_parent: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + step_push(ns, p); + + break; + } + + case axis_preceding: + { + // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding + step_fill(ns, p, v); + break; + } + + default: + assert(!"Unimplemented axis"); + } + } + + template void step_do(xpath_node_set& ns, const xpath_context& c, T v) + { + const axis_t axis = T::axis; + + assert(ns.empty()); + + switch (axis) + { + case axis_ancestor: + case axis_ancestor_or_self: + case axis_descendant_or_self: + case axis_following: + case axis_parent: + case axis_preceding: + case axis_self: + if (m_left) + { + xpath_node_set s = m_left->eval_node_set(c); + + for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it) + { + size_t size = ns.size(); + + if (it->node()) + step_fill(ns, it->node(), v); + else + step_fill(ns, it->attribute(), it->parent(), v); + + apply_predicates(ns, size, c); + } + } + else + { + if (c.n.node()) step_fill(ns, c.n.node(), v); + else step_fill(ns, c.n.attribute(), c.n.parent(), v); + + apply_predicates(ns, 0, c); + } + + break; + + case axis_following_sibling: + case axis_preceding_sibling: + case axis_attribute: + case axis_child: + case axis_descendant: + if (m_left) + { + xpath_node_set s = m_left->eval_node_set(c); + + for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it) + { + size_t size = ns.size(); + + if (it->node()) + step_fill(ns, it->node(), v); + + apply_predicates(ns, size, c); + } + } + else if (c.n.node()) + { + step_fill(ns, c.n.node(), v); + + apply_predicates(ns, 0, c); + } + + break; + + case axis_namespace: + break; + + default: + assert(!"Unimplemented axis"); + } + } + + static const char_t* duplicate_string(const xpath_lexer_string& value, xpath_allocator& a) + { + if (value.begin) + { + size_t length = static_cast(value.end - value.begin); + + char_t* c = static_cast(a.alloc((length + 1) * sizeof(char_t))); + memcpy(c, value.begin, length * sizeof(char_t)); + c[length] = 0; + + return c; + } + else return 0; + } + public: + xpath_ast_node(ast_type_t type, xpath_value_type rettype, const xpath_lexer_string& value, xpath_allocator& a): + m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0) + { + assert(type == ast_string_constant); + m_data.string = duplicate_string(value, a); + } + + xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value): + m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0) + { + assert(type == ast_number_constant); + m_data.number = value; + } + + xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0): + m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(left), m_right(right), m_next(0) + { + } + + xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const xpath_lexer_string& contents, xpath_allocator& a): + m_type((char)type), m_rettype(xpath_type_node_set), m_axis((char)axis), m_test((char)test), m_left(left), m_right(0), m_next(0) + { + m_data.nodetest = duplicate_string(contents, a); + } + + void set_next(xpath_ast_node* value) + { + m_next = value; + } + + void set_right(xpath_ast_node* value) + { + m_right = value; + } + + bool eval_boolean(const xpath_context& c) + { + switch (m_type) + { + case ast_op_or: + if (m_left->eval_boolean(c)) return true; + else return m_right->eval_boolean(c); + + case ast_op_and: + if (!m_left->eval_boolean(c)) return false; + else return m_right->eval_boolean(c); + + case ast_op_equal: + return compare_eq(m_left, m_right, c, equal_to()); + + case ast_op_not_equal: + return compare_eq(m_left, m_right, c, not_equal_to()); + + case ast_op_less: + return compare_rel(m_left, m_right, c, less()); + + case ast_op_greater: + return compare_rel(m_right, m_left, c, less()); + + case ast_op_less_or_equal: + return compare_rel(m_left, m_right, c, less_equal()); + + case ast_op_greater_or_equal: + return compare_rel(m_right, m_left, c, less_equal()); + + case ast_func_starts_with: + return starts_with(m_left->eval_string(c).c_str(), m_right->eval_string(c).c_str()); + + case ast_func_contains: + { + string_t lr = m_left->eval_string(c); + string_t rr = m_right->eval_string(c); + + return rr.empty() || lr.find(rr) != string_t::npos; + } + + case ast_func_boolean: + return m_left->eval_boolean(c); + + case ast_func_not: + return !m_left->eval_boolean(c); + + case ast_func_true: + return true; + + case ast_func_false: + return false; + + case ast_func_lang: + { + if (c.n.attribute()) return false; + + string_t lang = m_left->eval_string(c); + + for (xml_node n = c.n.node(); n; n = n.parent()) + { + xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); + + if (a) + { + const char_t* value = a.value(); + + // strnicmp / strncasecmp is not portable + for (const char_t* lit = lang.c_str(); *lit; ++lit) + { + if (tolower(*lit) != tolower(*value)) return false; + ++value; + } + + return *value == 0 || *value == '-'; + } + } + + return false; + } + + default: + { + switch (m_rettype) + { + case xpath_type_number: + return convert_number_to_boolean(eval_number(c)); + + case xpath_type_string: + return !eval_string(c).empty(); + + case xpath_type_node_set: + return !eval_node_set(c).empty(); + + default: + assert(!"Wrong expression for return type boolean"); + return false; + } + } + } + } + + double eval_number(const xpath_context& c) + { + switch (m_type) + { + case ast_op_add: + return m_left->eval_number(c) + m_right->eval_number(c); + + case ast_op_subtract: + return m_left->eval_number(c) - m_right->eval_number(c); + + case ast_op_multiply: + return m_left->eval_number(c) * m_right->eval_number(c); + + case ast_op_divide: + return m_left->eval_number(c) / m_right->eval_number(c); + + case ast_op_mod: + return fmod(m_left->eval_number(c), m_right->eval_number(c)); + + case ast_op_negate: + return -m_left->eval_number(c); + + case ast_number_constant: + return m_data.number; + + case ast_func_last: + return (double)c.size; + + case ast_func_position: + return (double)c.position; + + case ast_func_count: + return (double)m_left->eval_node_set(c).size(); + + case ast_func_string_length_0: + return (double)string_value(c.n).size(); + + case ast_func_string_length_1: + return (double)m_left->eval_string(c).size(); + + case ast_func_number_0: + return convert_string_to_number(string_value(c.n).c_str()); + + case ast_func_number_1: + return m_left->eval_number(c); + + case ast_func_sum: + { + double r = 0; + + xpath_node_set ns = m_left->eval_node_set(c); + + for (xpath_node_set::const_iterator it = ns.begin(); it != ns.end(); ++it) + r += convert_string_to_number(string_value(*it).c_str()); + + return r; + } + + case ast_func_floor: + { + double r = m_left->eval_number(c); + + return r == r ? floor(r) : r; + } + + case ast_func_ceiling: + { + double r = m_left->eval_number(c); + + return r == r ? ceil(r) : r; + } + + case ast_func_round: + return round_nearest_nzero(m_left->eval_number(c)); + + default: + { + switch (m_rettype) + { + case xpath_type_boolean: + return eval_boolean(c) ? 1 : 0; + + case xpath_type_string: + return convert_string_to_number(eval_string(c).c_str()); + + case xpath_type_node_set: + return convert_string_to_number(eval_string(c).c_str()); + + default: + assert(!"Wrong expression for return type number"); + return 0; + } + + } + } + } + + string_t eval_string(const xpath_context& c) + { + switch (m_type) + { + case ast_string_constant: + return m_data.string; + + case ast_func_local_name_0: + { + xpath_node na = c.n; + + if (na.attribute()) return local_name(na.attribute().name()); + else return local_name(na.node().name()); + } + + case ast_func_local_name_1: + { + xpath_node_set ns = m_left->eval_node_set(c); + if (ns.empty()) return string_t(); + + xpath_node na = ns.first(); + + if (na.attribute()) return local_name(na.attribute().name()); + else return local_name(na.node().name()); + } + + case ast_func_name_0: + { + xpath_node na = c.n; + + if (na.attribute()) return na.attribute().name(); + else return na.node().name(); + } + + case ast_func_name_1: + { + xpath_node_set ns = m_left->eval_node_set(c); + if (ns.empty()) return string_t(); + + xpath_node na = ns.first(); + + if (na.attribute()) return na.attribute().name(); + else return na.node().name(); + } + + case ast_func_namespace_uri_0: + { + xpath_node na = c.n; + + if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); + else return namespace_uri(na.node()); + } + + case ast_func_namespace_uri_1: + { + xpath_node_set ns = m_left->eval_node_set(c); + if (ns.empty()) return string_t(); + + xpath_node na = ns.first(); + + if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); + else return namespace_uri(na.node()); + } + + case ast_func_string_0: + return string_value(c.n); + + case ast_func_string_1: + return m_left->eval_string(c); + + case ast_func_concat: + { + string_t r = m_left->eval_string(c); + + for (xpath_ast_node* n = m_right; n; n = n->m_next) + r += n->eval_string(c); + + return r; + } + + case ast_func_substring_before: + { + string_t s = m_left->eval_string(c); + string_t::size_type pos = s.find(m_right->eval_string(c)); + + if (pos == string_t::npos) return string_t(); + else return string_t(s.begin(), s.begin() + pos); + } + + case ast_func_substring_after: + { + string_t s = m_left->eval_string(c); + string_t p = m_right->eval_string(c); + + string_t::size_type pos = s.find(p); + + if (pos == string_t::npos) return string_t(); + else return string_t(s.begin() + pos + p.length(), s.end()); + } + + case ast_func_substring_2: + { + string_t s = m_left->eval_string(c); + double first = round_nearest(m_right->eval_number(c)); + + if (is_nan(first)) return string_t(); // NaN + else if (first >= s.length() + 1) return string_t(); + + size_t pos = first < 1 ? 1 : (size_t)first; + + return s.substr(pos - 1); + } + + case ast_func_substring_3: + { + string_t s = m_left->eval_string(c); + double first = round_nearest(m_right->eval_number(c)); + double last = first + round_nearest(m_right->m_next->eval_number(c)); + + if (is_nan(first) || is_nan(last)) return string_t(); + else if (first >= s.length() + 1) return string_t(); + else if (first >= last) return string_t(); + + size_t pos = first < 1 ? 1 : (size_t)first; + size_t end = last >= s.length() + 1 ? s.length() + 1 : (size_t)last; + + size_t size_requested = end - pos; + size_t size_to_end = s.length() - pos + 1; + + return s.substr(pos - 1, size_requested < size_to_end ? size_requested : size_to_end); + } + + case ast_func_normalize_space_0: + case ast_func_normalize_space_1: + { + string_t s = m_type == ast_func_normalize_space_0 ? string_value(c.n) : m_left->eval_string(c); + + string_t r; + r.reserve(s.size()); + + for (string_t::const_iterator it = s.begin(); it != s.end(); ++it) + { + if (IS_CHARTYPEX(*it, ctx_space)) + { + if (!r.empty() && r[r.size() - 1] != ' ') + r += ' '; + } + else r += *it; + } + + string_t::size_type pos = r.find_last_not_of(' '); + if (pos == string_t::npos) r = string_t(); + else r.erase(r.begin() + pos + 1, r.end()); + + return r; + } + + case ast_func_translate: + { + string_t s = m_left->eval_string(c); + string_t from = m_right->eval_string(c); + string_t to = m_right->m_next->eval_string(c); + + for (string_t::iterator it = s.begin(); it != s.end(); ) + { + string_t::size_type pos = from.find(*it); + + if (pos == string_t::npos) + ++it; + else if (pos >= to.length()) + it = s.erase(it); + else + *it++ = to[pos]; + } + + return s; + } + + default: + { + switch (m_rettype) + { + case xpath_type_boolean: + return eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"); + + case xpath_type_number: + return convert_number_to_string(eval_number(c)); + + case xpath_type_node_set: + { + xpath_node_set ns = eval_node_set(c); + return ns.empty() ? string_t() : string_value(ns.first()); + } + + default: + assert(!"Wrong expression for return type string"); + return string_t(); + } + } + } + } + + xpath_node_set eval_node_set(const xpath_context& c) + { + switch (m_type) + { + case ast_op_union: + { + xpath_node_set ls = m_left->eval_node_set(c); + xpath_node_set rs = m_right->eval_node_set(c); + + // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother + ls.m_type = xpath_node_set::type_unsorted; + + ls.append(rs.begin(), rs.end()); + + ls.remove_duplicates(); + + return ls; + } + + case ast_filter: + case ast_filter_posinv: + { + xpath_node_set set = m_left->eval_node_set(c); + + // either expression is a number or it contains position() call; sort by document order + if (m_type == ast_filter) set.sort(); + + apply_predicate(set, 0, m_right, c); + + return set; + } + + case ast_func_id: + return xpath_node_set(); + + case ast_step: + { + xpath_node_set ns; + + switch (m_axis) + { + case axis_ancestor: + step_do(ns, c, axis_to_type()); + break; + + case axis_ancestor_or_self: + step_do(ns, c, axis_to_type()); + break; + + case axis_attribute: + step_do(ns, c, axis_to_type()); + break; + + case axis_child: + step_do(ns, c, axis_to_type()); + break; + + case axis_descendant: + step_do(ns, c, axis_to_type()); + break; + + case axis_descendant_or_self: + step_do(ns, c, axis_to_type()); + break; + + case axis_following: + step_do(ns, c, axis_to_type()); + break; + + case axis_following_sibling: + step_do(ns, c, axis_to_type()); + break; + + case axis_namespace: + step_do(ns, c, axis_to_type()); + break; + + case axis_parent: + step_do(ns, c, axis_to_type()); + break; + + case axis_preceding: + step_do(ns, c, axis_to_type()); + break; + + case axis_preceding_sibling: + step_do(ns, c, axis_to_type()); + break; + + case axis_self: + step_do(ns, c, axis_to_type()); + break; + } + + ns.remove_duplicates(); + + return ns; + } + + case ast_step_root: + { + xpath_node_set ns; + + if (c.root) + { + ns.push_back(c.root); + + apply_predicates(ns, 0, c); + } + + return ns; + } + + default: + assert(!"Wrong expression for return type node set"); + return xpath_node_set(); + } + } + + bool is_posinv() + { + switch (m_type) + { + case ast_func_position: + return false; + + case ast_string_constant: + case ast_number_constant: + // $$ case ast_variable: + return true; + + case ast_step: + case ast_step_root: + return true; + + case ast_predicate: + case ast_filter: + case ast_filter_posinv: + return true; + + default: + if (m_left && !m_left->is_posinv()) return false; + + for (xpath_ast_node* n = m_right; n; n = n->m_next) + if (!n->is_posinv()) return false; + + return true; + } + } + + xpath_value_type rettype() const + { + return static_cast(m_rettype); + } + }; + + void* xpath_allocator::node() + { + return alloc(sizeof(xpath_ast_node)); + } + + class xpath_parser + { + private: + xpath_allocator& m_alloc; + xpath_lexer m_lexer; + + xpath_parser(const xpath_parser&); + xpath_parser& operator=(const xpath_parser&); + + xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) + { + assert(argc <= 1); + + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw xpath_exception("Function has to be applied to node set"); + + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); + } + + xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) + { + switch (name.begin[0]) + { + case 'b': + if (name == PUGIXML_TEXT("boolean") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); + + break; + + case 'c': + if (name == PUGIXML_TEXT("count") && argc == 1) + { + if (args[0]->rettype() != xpath_type_node_set) throw xpath_exception("count() has to be applied to node set"); + return new (m_alloc.node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); + } + else if (name == PUGIXML_TEXT("contains") && argc == 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("concat") && argc >= 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("ceiling") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); + + break; + + case 'f': + if (name == PUGIXML_TEXT("false") && argc == 0) + return new (m_alloc.node()) xpath_ast_node(ast_func_false, xpath_type_boolean); + else if (name == PUGIXML_TEXT("floor") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); + + break; + + case 'i': + if (name == PUGIXML_TEXT("id") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); + + break; + + case 'l': + if (name == PUGIXML_TEXT("last") && argc == 0) + return new (m_alloc.node()) xpath_ast_node(ast_func_last, xpath_type_number); + else if (name == PUGIXML_TEXT("lang") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); + else if (name == PUGIXML_TEXT("local-name") && argc <= 1) + return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); + + break; + + case 'n': + if (name == PUGIXML_TEXT("name") && argc <= 1) + return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); + else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) + return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); + else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("not") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); + else if (name == PUGIXML_TEXT("number") && argc <= 1) + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); + + break; + + case 'p': + if (name == PUGIXML_TEXT("position") && argc == 0) + return new (m_alloc.node()) xpath_ast_node(ast_func_position, xpath_type_number); + + break; + + case 'r': + if (name == PUGIXML_TEXT("round") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); + + break; + + case 's': + if (name == PUGIXML_TEXT("string") && argc <= 1) + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); + else if (name == PUGIXML_TEXT("string-length") && argc <= 1) + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]); + else if (name == PUGIXML_TEXT("starts-with") && argc == 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring-before") && argc == 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring-after") && argc == 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) + return new (m_alloc.node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("sum") && argc == 1) + { + if (args[0]->rettype() != xpath_type_node_set) throw xpath_exception("sum() has to be applied to node set"); + return new (m_alloc.node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); + } + + break; + + case 't': + if (name == PUGIXML_TEXT("translate") && argc == 3) + return new (m_alloc.node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("true") && argc == 0) + return new (m_alloc.node()) xpath_ast_node(ast_func_true, xpath_type_boolean); + + break; + } + + throw xpath_exception("Unrecognized function or wrong parameter count"); + + #ifdef __DMC__ + return 0; // Digital Mars C++ + #endif + } + + axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) + { + specified = true; + + switch (name.begin[0]) + { + case 'a': + if (name == PUGIXML_TEXT("ancestor")) + return axis_ancestor; + else if (name == PUGIXML_TEXT("ancestor-or-self")) + return axis_ancestor_or_self; + else if (name == PUGIXML_TEXT("attribute")) + return axis_attribute; + + break; + + case 'c': + if (name == PUGIXML_TEXT("child")) + return axis_child; + + break; + + case 'd': + if (name == PUGIXML_TEXT("descendant")) + return axis_descendant; + else if (name == PUGIXML_TEXT("descendant-or-self")) + return axis_descendant_or_self; + + break; + + case 'f': + if (name == PUGIXML_TEXT("following")) + return axis_following; + else if (name == PUGIXML_TEXT("following-sibling")) + return axis_following_sibling; + + break; + + case 'n': + if (name == PUGIXML_TEXT("namespace")) + return axis_namespace; + + break; + + case 'p': + if (name == PUGIXML_TEXT("parent")) + return axis_parent; + else if (name == PUGIXML_TEXT("preceding")) + return axis_preceding; + else if (name == PUGIXML_TEXT("preceding-sibling")) + return axis_preceding_sibling; + + break; + + case 's': + if (name == PUGIXML_TEXT("self")) + return axis_self; + + break; + } + + specified = false; + return axis_child; + } + + nodetest_t parse_node_test_type(const xpath_lexer_string& name) + { + switch (name.begin[0]) + { + case 'c': + if (name == PUGIXML_TEXT("comment")) + return nodetest_type_comment; + + break; + + case 'n': + if (name == PUGIXML_TEXT("node")) + return nodetest_type_node; + + break; + + case 'p': + if (name == PUGIXML_TEXT("processing-instruction")) + return nodetest_type_pi; + + break; + + case 't': + if (name == PUGIXML_TEXT("text")) + return nodetest_type_text; + + break; + } + + return nodetest_none; + } + + // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall + xpath_ast_node* parse_primary_expression() + { + switch (m_lexer.current()) + { + case lex_var_ref: + { + throw xpath_exception("Variables are not supported"); + + #ifdef __DMC__ + return 0; // Digital Mars C++ + #endif + } + + case lex_open_brace: + { + m_lexer.next(); + + xpath_ast_node* n = parse_expression(); + + if (m_lexer.current() != lex_close_brace) + throw xpath_exception("Unmatched braces"); + + m_lexer.next(); + + return n; + } + + case lex_quoted_string: + { + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_string_constant, xpath_type_string, m_lexer.contents(), m_alloc); + m_lexer.next(); + + return n; + } + + case lex_number: + { + double value = convert_string_to_number(m_lexer.contents().begin, m_lexer.contents().end); + + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); + m_lexer.next(); + + return n; + } + + case lex_string: + { + xpath_ast_node* args[2] = {0}; + size_t argc = 0; + + xpath_lexer_string function = m_lexer.contents(); + m_lexer.next(); + + xpath_ast_node* last_arg = 0; + + if (m_lexer.current() != lex_open_brace) + throw xpath_exception("Unrecognized function call"); + m_lexer.next(); + + if (m_lexer.current() != lex_close_brace) + args[argc++] = parse_expression(); + + while (m_lexer.current() != lex_close_brace) + { + if (m_lexer.current() != lex_comma) + throw xpath_exception("No comma between function arguments"); + m_lexer.next(); + + xpath_ast_node* n = parse_expression(); + + if (argc < 2) args[argc] = n; + else last_arg->set_next(n); + + argc++; + last_arg = n; + } + + m_lexer.next(); + + return parse_function(function, argc, args); + } + + default: + throw xpath_exception("Unrecognizable primary expression"); + #ifdef __DMC__ + return 0; // Digital Mars C++ + #endif + } + } + + // FilterExpr ::= PrimaryExpr | FilterExpr Predicate + // Predicate ::= '[' PredicateExpr ']' + // PredicateExpr ::= Expr + xpath_ast_node* parse_filter_expression() + { + xpath_ast_node* n = parse_primary_expression(); + + while (m_lexer.current() == lex_open_square_brace) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_expression(); + + if (n->rettype() != xpath_type_node_set) throw xpath_exception("Predicate has to be applied to node set"); + + bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv(); + + n = new (m_alloc.node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr); + + if (m_lexer.current() != lex_close_square_brace) + throw xpath_exception("Unmatched square brace"); + + m_lexer.next(); + } + + return n; + } + + // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep + // AxisSpecifier ::= AxisName '::' | '@'? + // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' + // NameTest ::= '*' | NCName ':' '*' | QName + // AbbreviatedStep ::= '.' | '..' + xpath_ast_node* parse_step(xpath_ast_node* set) + { + if (set && set->rettype() != xpath_type_node_set) + throw xpath_exception("Step has to be applied to node set"); + + bool axis_specified = false; + axis_t axis = axis_child; // implied child axis + + if (m_lexer.current() == lex_axis_attribute) + { + axis = axis_attribute; + axis_specified = true; + + m_lexer.next(); + } + else if (m_lexer.current() == lex_dot) + { + m_lexer.next(); + + return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, xpath_lexer_string(), m_alloc); + } + else if (m_lexer.current() == lex_double_dot) + { + m_lexer.next(); + + return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, xpath_lexer_string(), m_alloc); + } + + nodetest_t nt_type = nodetest_none; + xpath_lexer_string nt_name; + + if (m_lexer.current() == lex_string) + { + // node name test + nt_name = m_lexer.contents(); + m_lexer.next(); + + // was it an axis name? + if (m_lexer.current() == lex_double_colon) + { + // parse axis name + if (axis_specified) throw xpath_exception("Two axis specifiers in one step"); + + axis = parse_axis_name(nt_name, axis_specified); + + if (!axis_specified) throw xpath_exception("Unknown axis"); + + // read actual node test + m_lexer.next(); + + if (m_lexer.current() == lex_multiply) + { + nt_type = nodetest_all; + nt_name = xpath_lexer_string(); + m_lexer.next(); + } + else if (m_lexer.current() == lex_string) + { + nt_name = m_lexer.contents(); + m_lexer.next(); + } + else throw xpath_exception("Unrecognized node test"); + } + + if (nt_type == nodetest_none) + { + // node type test or processing-instruction + if (m_lexer.current() == lex_open_brace) + { + m_lexer.next(); + + if (m_lexer.current() == lex_close_brace) + { + m_lexer.next(); + + nt_type = parse_node_test_type(nt_name); + + if (nt_type == nodetest_none) throw xpath_exception("Unrecognized node type"); + + nt_name = xpath_lexer_string(); + } + else if (nt_name == PUGIXML_TEXT("processing-instruction")) + { + if (m_lexer.current() != lex_quoted_string) + throw xpath_exception("Only literals are allowed as arguments to processing-instruction()"); + + nt_type = nodetest_pi; + nt_name = m_lexer.contents(); + m_lexer.next(); + + if (m_lexer.current() != lex_close_brace) + throw xpath_exception("Unmatched brace near processing-instruction()"); + m_lexer.next(); + } + else + throw xpath_exception("Unmatched brace near node type test"); + + } + // QName or NCName:* + else + { + const char_t* colon_pos = std::char_traits::find(nt_name.begin, static_cast(nt_name.end - nt_name.begin), ':'); + + if (colon_pos && colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:* + { + nt_name.end--; // erase * + + nt_type = nodetest_all_in_namespace; + } + else nt_type = nodetest_name; + } + } + } + else if (m_lexer.current() == lex_multiply) + { + nt_type = nodetest_all; + m_lexer.next(); + } + else throw xpath_exception("Unrecognized node test"); + + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name, m_alloc); + + xpath_ast_node* last = 0; + + while (m_lexer.current() == lex_open_square_brace) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_expression(); + + xpath_ast_node* pred = new (m_alloc.node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr); + + if (m_lexer.current() != lex_close_square_brace) + throw xpath_exception("Unmatched square brace"); + m_lexer.next(); + + if (last) last->set_next(pred); + else n->set_right(pred); + + last = pred; + } + + return n; + } + + // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step + xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) + { + xpath_ast_node* n = parse_step(set); + + while (m_lexer.current() == lex_slash || m_lexer.current() == lex_double_slash) + { + lexeme_t l = m_lexer.current(); + m_lexer.next(); + + if (l == lex_double_slash) + n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); + + n = parse_step(n); + } + + return n; + } + + // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath + // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath + xpath_ast_node* parse_location_path() + { + if (m_lexer.current() == lex_slash) + { + m_lexer.next(); + + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + + // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path + lexeme_t l = m_lexer.current(); + + if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) + return parse_relative_location_path(n); + else + return n; + } + else if (m_lexer.current() == lex_double_slash) + { + m_lexer.next(); + + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); + + return parse_relative_location_path(n); + } + else + { + return parse_relative_location_path(0); + } + } + + // PathExpr ::= LocationPath + // | FilterExpr + // | FilterExpr '/' RelativeLocationPath + // | FilterExpr '//' RelativeLocationPath + xpath_ast_node* parse_path_expression() + { + // Clarification. + // PathExpr begins with either LocationPath or FilterExpr. + // FilterExpr begins with PrimaryExpr + // PrimaryExpr begins with '$' in case of it being a variable reference, + // '(' in case of it being an expression, string literal, number constant or + // function call. + + if (m_lexer.current() == lex_var_ref || m_lexer.current() == lex_open_brace || + m_lexer.current() == lex_quoted_string || m_lexer.current() == lex_number || + m_lexer.current() == lex_string) + { + if (m_lexer.current() == lex_string) + { + // This is either a function call, or not - if not, we shall proceed with location path + const char_t* state = m_lexer.state(); + + while (IS_CHARTYPEX(*state, ctx_space)) ++state; + + if (*state != '(') return parse_location_path(); + + // This looks like a function call; however this still can be a node-test. Check it. + if (parse_node_test_type(m_lexer.contents()) != nodetest_none) return parse_location_path(); + } + + xpath_ast_node* n = parse_filter_expression(); + + if (m_lexer.current() == lex_slash || m_lexer.current() == lex_double_slash) + { + lexeme_t l = m_lexer.current(); + m_lexer.next(); + + if (l == lex_double_slash) + { + if (n->rettype() != xpath_type_node_set) throw xpath_exception("Step has to be applied to node set"); + + n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); + } + + // select from location path + return parse_relative_location_path(n); + } + + return n; + } + else return parse_location_path(); + } + + // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr + xpath_ast_node* parse_union_expression() + { + xpath_ast_node* n = parse_path_expression(); + + while (m_lexer.current() == lex_union) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_union_expression(); + + if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set) + throw xpath_exception("Union operator has to be applied to node sets"); + + n = new (m_alloc.node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr); + } + + return n; + } + + // UnaryExpr ::= UnionExpr | '-' UnaryExpr + xpath_ast_node* parse_unary_expression() + { + if (m_lexer.current() == lex_minus) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_unary_expression(); + + return new (m_alloc.node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); + } + else return parse_union_expression(); + } + + // MultiplicativeExpr ::= UnaryExpr + // | MultiplicativeExpr '*' UnaryExpr + // | MultiplicativeExpr 'div' UnaryExpr + // | MultiplicativeExpr 'mod' UnaryExpr + xpath_ast_node* parse_multiplicative_expression() + { + xpath_ast_node* n = parse_unary_expression(); + + while (m_lexer.current() == lex_multiply || (m_lexer.current() == lex_string && + (m_lexer.contents() == PUGIXML_TEXT("mod") || m_lexer.contents() == PUGIXML_TEXT("div")))) + { + ast_type_t op = m_lexer.current() == lex_multiply ? ast_op_multiply : + m_lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod; + m_lexer.next(); + + xpath_ast_node* expr = parse_unary_expression(); + + n = new (m_alloc.node()) xpath_ast_node(op, xpath_type_number, n, expr); + } + + return n; + } + + // AdditiveExpr ::= MultiplicativeExpr + // | AdditiveExpr '+' MultiplicativeExpr + // | AdditiveExpr '-' MultiplicativeExpr + xpath_ast_node* parse_additive_expression() + { + xpath_ast_node* n = parse_multiplicative_expression(); + + while (m_lexer.current() == lex_plus || m_lexer.current() == lex_minus) + { + lexeme_t l = m_lexer.current(); + + m_lexer.next(); + + xpath_ast_node* expr = parse_multiplicative_expression(); + + n = new (m_alloc.node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr); + } + + return n; + } + + // RelationalExpr ::= AdditiveExpr + // | RelationalExpr '<' AdditiveExpr + // | RelationalExpr '>' AdditiveExpr + // | RelationalExpr '<=' AdditiveExpr + // | RelationalExpr '>=' AdditiveExpr + xpath_ast_node* parse_relational_expression() + { + xpath_ast_node* n = parse_additive_expression(); + + while (m_lexer.current() == lex_less || m_lexer.current() == lex_less_or_equal || + m_lexer.current() == lex_greater || m_lexer.current() == lex_greater_or_equal) + { + lexeme_t l = m_lexer.current(); + m_lexer.next(); + + xpath_ast_node* expr = parse_additive_expression(); + + n = new (m_alloc.node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater : + l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr); + } + + return n; + } + + // EqualityExpr ::= RelationalExpr + // | EqualityExpr '=' RelationalExpr + // | EqualityExpr '!=' RelationalExpr + xpath_ast_node* parse_equality_expression() + { + xpath_ast_node* n = parse_relational_expression(); + + while (m_lexer.current() == lex_equal || m_lexer.current() == lex_not_equal) + { + lexeme_t l = m_lexer.current(); + + m_lexer.next(); + + xpath_ast_node* expr = parse_relational_expression(); + + n = new (m_alloc.node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr); + } + + return n; + } + + // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr + xpath_ast_node* parse_and_expression() + { + xpath_ast_node* n = parse_equality_expression(); + + while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("and")) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_equality_expression(); + + n = new (m_alloc.node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr); + } + + return n; + } + + // OrExpr ::= AndExpr | OrExpr 'or' AndExpr + xpath_ast_node* parse_or_expression() + { + xpath_ast_node* n = parse_and_expression(); + + while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("or")) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_and_expression(); + + n = new (m_alloc.node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr); + } + + return n; + } + + // Expr ::= OrExpr + xpath_ast_node* parse_expression() + { + return parse_or_expression(); + } + + public: + explicit xpath_parser(const char_t* query, xpath_allocator& alloc): m_alloc(alloc), m_lexer(query) + { + } + + xpath_ast_node* parse() + { + xpath_ast_node* result = parse_expression(); + + if (m_lexer.current() != lex_eof) + { + // there are still unparsed tokens left, error + throw xpath_exception("Incorrect query"); + } + + return result; + } + }; + + xpath_query::xpath_query(const char_t* query): m_alloc(0), m_root(0) + { + compile(query); + } + + xpath_query::~xpath_query() + { + delete m_alloc; + } + + void xpath_query::compile(const char_t* query) + { + delete m_alloc; + m_alloc = new xpath_allocator; + + xpath_parser p(query, *m_alloc); + + m_root = p.parse(); + } + + xpath_value_type xpath_query::return_type() const + { + if (!m_root) return xpath_type_none; + + return m_root->rettype(); + } + + bool xpath_query::evaluate_boolean(const xml_node& n) const + { + if (!m_root) return false; + + xpath_context c; + + c.root = n.root(); + c.n = n; + c.position = 1; + c.size = 1; + + return m_root->eval_boolean(c); + } + + double xpath_query::evaluate_number(const xml_node& n) const + { + if (!m_root) return gen_nan(); + + xpath_context c; + + c.root = n.root(); + c.n = n; + c.position = 1; + c.size = 1; + + return m_root->eval_number(c); + } + + string_t xpath_query::evaluate_string(const xml_node& n) const + { + if (!m_root) return string_t(); + + xpath_context c; + + c.root = n.root(); + c.n = n; + c.position = 1; + c.size = 1; + + return m_root->eval_string(c); + } + + xpath_node_set xpath_query::evaluate_node_set(const xml_node& n) const + { + if (!m_root) return xpath_node_set(); + if (m_root->rettype() != xpath_type_node_set) throw xpath_exception("Expression does not evaluate to node set"); + + xpath_context c; + + c.root = n.root(); + c.n = n; + c.position = 1; + c.size = 1; + + return m_root->eval_node_set(c); + } + + xpath_node xml_node::select_single_node(const char_t* query) const + { + xpath_query q(query); + return select_single_node(q); + } + + xpath_node xml_node::select_single_node(const xpath_query& query) const + { + xpath_node_set s = query.evaluate_node_set(*this); + return s.empty() ? xpath_node() : s.first(); + } + + xpath_node_set xml_node::select_nodes(const char_t* query) const + { + xpath_query q(query); + return select_nodes(q); + } + + xpath_node_set xml_node::select_nodes(const xpath_query& query) const + { + return query.evaluate_node_set(*this); + } +} + +#endif + +/** + * Copyright (c) 2006-2010 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ diff --git a/tests/allocator.cpp b/tests/allocator.cpp index e0efeef..234f95c 100644 --- a/tests/allocator.cpp +++ b/tests/allocator.cpp @@ -1,94 +1,94 @@ -#include "allocator.hpp" - -#include - -// Low-level allocation functions -#if defined(_WIN32) || defined(_WIN64) -# ifdef __MWERKS__ -# pragma ANSI_strict off // disable ANSI strictness to include windows.h -# pragma cpp_extensions on // enable some extensions to include windows.h -# endif - -# ifdef _XBOX_VER -# define NOD3D -# include -# else -# include -# endif - -namespace -{ - const size_t PAGE_SIZE = 4096; - - void* allocate(size_t size) - { - size_t aligned_size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); - - void* ptr = VirtualAlloc(0, aligned_size + PAGE_SIZE, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); - if (!ptr) return 0; - - void* end = (char*)ptr + aligned_size; - - DWORD old_flags; - VirtualProtect(end, PAGE_SIZE, PAGE_NOACCESS, &old_flags); - - return (char*)end - size; - } - - void deallocate(void* ptr, size_t size) - { - size_t aligned_size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); - - void* rptr = (char*)ptr + size - aligned_size; - - DWORD old_flags; - VirtualProtect(rptr, aligned_size + PAGE_SIZE, PAGE_NOACCESS, &old_flags); - } -} -#else -# include - -namespace -{ - void* allocate(size_t size) - { - return malloc(size); - } - - void deallocate(void* ptr, size_t size) - { - (void)size; - - free(ptr); - } -} -#endif - -// High-level allocation functions -void* memory_allocate(size_t size) -{ - void* result = allocate(size + sizeof(size_t)); - if (!result) return 0; - - memcpy(result, &size, sizeof(size_t)); - - return (size_t*)result + 1; -} - -size_t memory_size(void* ptr) -{ - size_t result; - memcpy(&result, (size_t*)ptr - 1, sizeof(size_t)); - - return result; -} - -void memory_deallocate(void* ptr) -{ - if (!ptr) return; - - size_t size = memory_size(ptr); - - deallocate((size_t*)ptr - 1, size + sizeof(size_t)); -} - +#include "allocator.hpp" + +#include + +// Low-level allocation functions +#if defined(_WIN32) || defined(_WIN64) +# ifdef __MWERKS__ +# pragma ANSI_strict off // disable ANSI strictness to include windows.h +# pragma cpp_extensions on // enable some extensions to include windows.h +# endif + +# ifdef _XBOX_VER +# define NOD3D +# include +# else +# include +# endif + +namespace +{ + const size_t PAGE_SIZE = 4096; + + void* allocate(size_t size) + { + size_t aligned_size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + + void* ptr = VirtualAlloc(0, aligned_size + PAGE_SIZE, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!ptr) return 0; + + void* end = (char*)ptr + aligned_size; + + DWORD old_flags; + VirtualProtect(end, PAGE_SIZE, PAGE_NOACCESS, &old_flags); + + return (char*)end - size; + } + + void deallocate(void* ptr, size_t size) + { + size_t aligned_size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + + void* rptr = (char*)ptr + size - aligned_size; + + DWORD old_flags; + VirtualProtect(rptr, aligned_size + PAGE_SIZE, PAGE_NOACCESS, &old_flags); + } +} +#else +# include + +namespace +{ + void* allocate(size_t size) + { + return malloc(size); + } + + void deallocate(void* ptr, size_t size) + { + (void)size; + + free(ptr); + } +} +#endif + +// High-level allocation functions +void* memory_allocate(size_t size) +{ + void* result = allocate(size + sizeof(size_t)); + if (!result) return 0; + + memcpy(result, &size, sizeof(size_t)); + + return (size_t*)result + 1; +} + +size_t memory_size(void* ptr) +{ + size_t result; + memcpy(&result, (size_t*)ptr - 1, sizeof(size_t)); + + return result; +} + +void memory_deallocate(void* ptr) +{ + if (!ptr) return; + + size_t size = memory_size(ptr); + + deallocate((size_t*)ptr - 1, size + sizeof(size_t)); +} + diff --git a/tests/allocator.hpp b/tests/allocator.hpp index 677fbe4..cb52c91 100644 --- a/tests/allocator.hpp +++ b/tests/allocator.hpp @@ -1,10 +1,10 @@ -#ifndef HEADER_TEST_ALLOCATOR_HPP -#define HEADER_TEST_ALLOCATOR_HPP - -#include - -void* memory_allocate(size_t size); -size_t memory_size(void* ptr); -void memory_deallocate(void* ptr); - -#endif +#ifndef HEADER_TEST_ALLOCATOR_HPP +#define HEADER_TEST_ALLOCATOR_HPP + +#include + +void* memory_allocate(size_t size); +size_t memory_size(void* ptr); +void memory_deallocate(void* ptr); + +#endif diff --git a/tests/archive.pl b/tests/archive.pl index 240dd02..4ede302 100644 --- a/tests/archive.pl +++ b/tests/archive.pl @@ -1,60 +1,60 @@ -#!/usr/bin/perl - -use Archive::Tar; -use Archive::Zip; - -my $target = shift @ARGV; -my @sources = @ARGV; - -my $zip = $target =~ /\.zip$/; - -my $arch = $zip ? Archive::Zip->new : Archive::Tar->new; - -for $source (sort {$a cmp $b} @sources) -{ - my $contents = &readfile_contents($source); - my $meta = &readfile_meta($source); - - if ($zip) - { - my $path = $source; - $arch->addDirectory($path) if $path =~ s/\/[^\/]+$/\// && !defined($arch->memberNamed($path)); - - my $member = $arch->addString($contents, $source); - - $member->desiredCompressionMethod(COMPRESSION_DEFLATED); - $member->desiredCompressionLevel(9); - - $member->setLastModFileDateTimeFromUnix($$meta{mtime}); - } - else - { - # tgz releases are for Unix people, Unix people like Unix newlines - $contents =~ s/\r//g if (-T $source); - - $arch->add_data($source, $contents, $meta); - } -} - -$zip ? $arch->overwriteAs($target) : $arch->write($target, 9); - -sub readfile_contents -{ - my $file = shift; - - open FILE, $file or die "Can't open $file: $!"; - binmode FILE; - my @contents = ; - close FILE; - - return join('', @contents); -} - -sub readfile_meta -{ - my $file = shift; - - my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = stat($file); - - return {mtime => $mtime}; -} +#!/usr/bin/perl + +use Archive::Tar; +use Archive::Zip; + +my $target = shift @ARGV; +my @sources = @ARGV; + +my $zip = $target =~ /\.zip$/; + +my $arch = $zip ? Archive::Zip->new : Archive::Tar->new; + +for $source (sort {$a cmp $b} @sources) +{ + my $contents = &readfile_contents($source); + my $meta = &readfile_meta($source); + + if ($zip) + { + my $path = $source; + $arch->addDirectory($path) if $path =~ s/\/[^\/]+$/\// && !defined($arch->memberNamed($path)); + + my $member = $arch->addString($contents, $source); + + $member->desiredCompressionMethod(COMPRESSION_DEFLATED); + $member->desiredCompressionLevel(9); + + $member->setLastModFileDateTimeFromUnix($$meta{mtime}); + } + else + { + # tgz releases are for Unix people, Unix people like Unix newlines + $contents =~ s/\r//g if (-T $source); + + $arch->add_data($source, $contents, $meta); + } +} + +$zip ? $arch->overwriteAs($target) : $arch->write($target, 9); + +sub readfile_contents +{ + my $file = shift; + + open FILE, $file or die "Can't open $file: $!"; + binmode FILE; + my @contents = ; + close FILE; + + return join('', @contents); +} + +sub readfile_meta +{ + my $file = shift; + + my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = stat($file); + + return {mtime => $mtime}; +} diff --git a/tests/autotest-local.pl b/tests/autotest-local.pl index 86e5114..b0e9a6c 100644 --- a/tests/autotest-local.pl +++ b/tests/autotest-local.pl @@ -1,126 +1,126 @@ -#!/usr/bin/perl - -use Config; - -sub permute -{ - my @defines = @_; - my @result = (''); - - foreach $define (@defines) - { - push @result, map { length($_) == 0 ? $define : "$_,$define" } @result; - } - - @result; -} - -sub gcctoolset -{ - my $gccversion = `gcc -dumpversion`; - chomp($gccversion); - - my $gcc = "gcc$gccversion"; - - return ($^O =~ /darwin/) ? ($gcc, "${gcc}_x64", "${gcc}_ppc") : (`uname -m` =~ /64/) ? ("${gcc}_x64") : ($gcc); -} - -$fast = (shift eq 'fast'); -@toolsets = ($^O =~ /MSWin/) ? (bcc, cw, dmc, ic8, ic9, ic9_x64, ic10, ic10_x64, ic11, ic11_x64, mingw34, mingw44, mingw45, mingw45_0x, mingw46_x64, msvc6, msvc7, msvc71, msvc8, msvc8_x64, msvc9, msvc9_x64, msvc10, msvc10_x64, xbox360, ps3_gcc, ps3_snc) : ($^O =~ /solaris/) ? (suncc, suncc_x64) : &gcctoolset(); -@configurations = (debug, release); -@defines = (PUGIXML_NO_XPATH, PUGIXML_NO_EXCEPTIONS, PUGIXML_NO_STL, PUGIXML_WCHAR_MODE); -$stddefine = 'PUGIXML_STANDARD'; - -if ($fast) -{ - @defines = (PUGIXML_WCHAR_MODE); - @configurations = (debug); -} - -@definesets = permute(@defines); - -print "### autotest begin " . scalar localtime() . "\n"; - -# print SVN revision info -print "### autotest revision $1\n" if (`svn info` =~ /Revision:\s+(\d+)/); - -# build all configurations -%results = (); - -foreach $toolset (@toolsets) -{ - my $cmdline = "jam"; - - # parallel build on non-windows platforms (since jam can't detect processor count) - $cmdline .= " -j6" if ($^O !~ /MSWin/); - - # add toolset - $cmdline .= " toolset=$toolset"; - - # add configurations - $cmdline .= " configuration=" . join(',', @configurations); - - # add definesets - $cmdline .= " defines=$stddefine"; - - foreach $defineset (@definesets) - { - if ($defineset !~ /NO_XPATH/ && $defineset =~ /NO_EXCEPTIONS/) { next; } - if ($defineset !~ /NO_XPATH/ && $defineset =~ /NO_STL/) { next; } - - $cmdline .= ":$defineset" if ($defineset ne ''); - - # any configuration with prepare but without result is treated as failed - foreach $configuration (@configurations) - { - print "### autotest $Config{archname} $toolset $configuration [$defineset] prepare\n"; - } - } - - print STDERR "*** testing $toolset... ***\n"; - - # launch command - print "### autotest launch $cmdline\n"; - - open PIPE, "$cmdline autotest=on coverage |" || die "$cmdline failed: $!\n"; - - # parse build output - while () - { - # ... autotest release [wchar] success - if (/^\.\.\. autotest (\S+) \[(.*?)\] success/) - { - my $configuration = $1; - my $defineset = ($2 eq $stddefine) ? '' : $2; - - print "### autotest $Config{archname} $toolset $configuration [$defineset] success\n"; - } - # ... autotest release [wchar] gcov - elsif (/^\.\.\. autotest (\S+) \[(.*?)\] gcov/) - { - my $configuration = $1; - my $defineset = ($2 eq $stddefine) ? '' : $2; - my $file; - - $file = "pugixml $1" if (/pugixml\.cpp' executed:([^%]+)%/); - $file = "pugixpath $1" if (/pugixpath\.cpp' executed:([^%]+)%/); - - if (defined($file)) - { - print "### autotest $Config{archname} $toolset $configuration [$defineset] coverage $file\n"; - } - else - { - print; - } - } - else - { - print; - } - } - - close PIPE; -} - -print "### autotest end " . scalar localtime() . "\n"; +#!/usr/bin/perl + +use Config; + +sub permute +{ + my @defines = @_; + my @result = (''); + + foreach $define (@defines) + { + push @result, map { length($_) == 0 ? $define : "$_,$define" } @result; + } + + @result; +} + +sub gcctoolset +{ + my $gccversion = `gcc -dumpversion`; + chomp($gccversion); + + my $gcc = "gcc$gccversion"; + + return ($^O =~ /darwin/) ? ($gcc, "${gcc}_x64", "${gcc}_ppc") : (`uname -m` =~ /64/) ? ("${gcc}_x64") : ($gcc); +} + +$fast = (shift eq 'fast'); +@toolsets = ($^O =~ /MSWin/) ? (bcc, cw, dmc, ic8, ic9, ic9_x64, ic10, ic10_x64, ic11, ic11_x64, mingw34, mingw44, mingw45, mingw45_0x, mingw46_x64, msvc6, msvc7, msvc71, msvc8, msvc8_x64, msvc9, msvc9_x64, msvc10, msvc10_x64, xbox360, ps3_gcc, ps3_snc) : ($^O =~ /solaris/) ? (suncc, suncc_x64) : &gcctoolset(); +@configurations = (debug, release); +@defines = (PUGIXML_NO_XPATH, PUGIXML_NO_EXCEPTIONS, PUGIXML_NO_STL, PUGIXML_WCHAR_MODE); +$stddefine = 'PUGIXML_STANDARD'; + +if ($fast) +{ + @defines = (PUGIXML_WCHAR_MODE); + @configurations = (debug); +} + +@definesets = permute(@defines); + +print "### autotest begin " . scalar localtime() . "\n"; + +# print SVN revision info +print "### autotest revision $1\n" if (`svn info` =~ /Revision:\s+(\d+)/); + +# build all configurations +%results = (); + +foreach $toolset (@toolsets) +{ + my $cmdline = "jam"; + + # parallel build on non-windows platforms (since jam can't detect processor count) + $cmdline .= " -j6" if ($^O !~ /MSWin/); + + # add toolset + $cmdline .= " toolset=$toolset"; + + # add configurations + $cmdline .= " configuration=" . join(',', @configurations); + + # add definesets + $cmdline .= " defines=$stddefine"; + + foreach $defineset (@definesets) + { + if ($defineset !~ /NO_XPATH/ && $defineset =~ /NO_EXCEPTIONS/) { next; } + if ($defineset !~ /NO_XPATH/ && $defineset =~ /NO_STL/) { next; } + + $cmdline .= ":$defineset" if ($defineset ne ''); + + # any configuration with prepare but without result is treated as failed + foreach $configuration (@configurations) + { + print "### autotest $Config{archname} $toolset $configuration [$defineset] prepare\n"; + } + } + + print STDERR "*** testing $toolset... ***\n"; + + # launch command + print "### autotest launch $cmdline\n"; + + open PIPE, "$cmdline autotest=on coverage |" || die "$cmdline failed: $!\n"; + + # parse build output + while () + { + # ... autotest release [wchar] success + if (/^\.\.\. autotest (\S+) \[(.*?)\] success/) + { + my $configuration = $1; + my $defineset = ($2 eq $stddefine) ? '' : $2; + + print "### autotest $Config{archname} $toolset $configuration [$defineset] success\n"; + } + # ... autotest release [wchar] gcov + elsif (/^\.\.\. autotest (\S+) \[(.*?)\] gcov/) + { + my $configuration = $1; + my $defineset = ($2 eq $stddefine) ? '' : $2; + my $file; + + $file = "pugixml $1" if (/pugixml\.cpp' executed:([^%]+)%/); + $file = "pugixpath $1" if (/pugixpath\.cpp' executed:([^%]+)%/); + + if (defined($file)) + { + print "### autotest $Config{archname} $toolset $configuration [$defineset] coverage $file\n"; + } + else + { + print; + } + } + else + { + print; + } + } + + close PIPE; +} + +print "### autotest end " . scalar localtime() . "\n"; diff --git a/tests/autotest-report.pl b/tests/autotest-report.pl index a01a907..993674a 100644 --- a/tests/autotest-report.pl +++ b/tests/autotest-report.pl @@ -1,199 +1,199 @@ -#!/usr/bin/perl - -# pretty-printing -sub prettysuffix -{ - my $suffix = shift; - - return " C++0x" if ($suffix eq '_0x'); - return " x64" if ($suffix eq '_x64'); - return " PPC" if ($suffix eq '_ppc'); - - return ""; -} - -sub prettytoolset -{ - my $toolset = shift; - - return "Borland C++ 5.82" if ($toolset eq 'bcc'); - return "Metrowerks CodeWarrior 8" if ($toolset eq 'cw'); - return "Digital Mars C++ 8.51" if ($toolset eq 'dmc'); - return "Sun C++ 5.10" . prettysuffix($1) if ($toolset =~ /^suncc(.*)$/); - - return "Intel C++ Compiler $1.0" . prettysuffix($2) if ($toolset =~ /^ic(\d+)(.*)$/); - return "MinGW (GCC $1.$2)" . prettysuffix($3) if ($toolset =~ /^mingw(\d)(\d)(.*)$/); - return "Microsoft Visual C++ 7.1" if ($toolset eq 'msvc71'); - return "Microsoft Visual C++ $1.0" . prettysuffix($2) if ($toolset =~ /^msvc(\d+)(.*)$/); - return "GNU C++ Compiler $1" . prettysuffix($2) if ($toolset =~ /^gcc([\d.]*)(.*)$/); - - return "Microsoft Xbox360 Compiler" if ($toolset =~ /^xbox360/); - return "Sony PlayStation3 GCC" if ($toolset =~ /^ps3_gcc/); - return "Sony PlayStation3 SNC" if ($toolset =~ /^ps3_snc/); - - $toolset; -} - -sub prettyplatform -{ - my ($platform, $toolset) = @_; - - return "solaris" if ($platform =~ /solaris/); - - return "macos" if ($platform =~ /darwin/); - - return "linux64" if ($platform =~ /64-linux/); - return "linux32" if ($platform =~ /86-linux/); - - return "fbsd64" if ($platform =~ /64-freebsd/); - return "fbsd32" if ($platform =~ /86-freebsd/); - - return "x360" if ($toolset =~ /^xbox360/); - return "ps3" if ($toolset =~ /^ps3/); - - return "win64" if ($platform =~ /MSWin32-x64/); - return "win32" if ($platform =~ /MSWin32/); - - $platform; -} - -# parse build log -%results = (); -%toolsets = (); -%defines = (); -%configurations = (); - -sub insertindex -{ - my ($hash, $key) = @_; - - $$hash{$key} = scalar(keys %$hash) unless defined $$hash{$key}; -} - -while (<>) -{ - ### autotest i386-freebsd-64int gcc release [wchar] result 0 97.78 98.85 - if (/^### autotest (\S+) (\S+) (\S+) \[(.*?)\] (.*)/) - { - my ($platform, $toolset, $configuration, $defineset, $info) = ($1, $2, $3, $4, $5); - - my $fulltool = &prettyplatform($platform, $toolset) . ' ' . &prettytoolset($toolset); - my $fullconf = "$configuration $defineset"; - - if ($info =~ /^prepare/) - { - $results{$fulltool}{$fullconf}{result} = 1; - } - elsif ($info =~ /^success/) - { - $results{$fulltool}{$fullconf}{result} = 0; - } - elsif ($info =~ /^coverage (\S+) (\S+)/) - { - $results{$fulltool}{$fullconf}{"coverage_$1"} = $2; - } - else - { - print STDERR "Unrecognized autotest infoline $_"; - } - - &insertindex(\%toolsets, $fulltool); - - $defines{$_} = 1 foreach (split /,/, $defineset); - &insertindex(\%configurations, $fullconf); - } - elsif (/^### autotest revision (\d+)/) - { - if (defined $revision && $revision != $1) - { - print STDERR "Autotest build report contains several revisions: $revision, $1\n"; - } - else - { - $revision = $1; - } - } -} - -# make arrays of toolsets and configurations -@toolsetarray = (); -@configurationarray = (); - -$toolsetarray[$toolsets{$_}] = $_ foreach (keys %toolsets); -$configurationarray[$configurations{$_}] = $_ foreach (keys %configurations); - -# print header -$stylesheet = <pugixml autotest report -

pugixml autotest report

- -END - -# print configuration header (release/debug) -print ""; -print "" foreach (@configurationarray); -print "\n"; - -# print defines header (one row for each define) -foreach $define (sort {$a cmp $b} keys %defines) -{ - print ""; - - foreach (@configurationarray) - { - my $present = ($_ =~ /\b$define\b/); - my $color = $present ? "#cccccc" : "#ffffff"; - print ""; - } - print "\n"; -} - -# print data (one row for each toolset) -foreach $tool (@toolsetarray) -{ - my ($platform, $toolset) = split(/\s+/, $tool, 2); - print ""; - - foreach (@configurationarray) - { - my $info = $results{$tool}{$_}; - - if (!defined $$info{result}) - { - print ""; - } - elsif ($$info{result} == 0) - { - my ($coverage_pugixml, $coverage_pugixpath) = ($$info{coverage_pugixml}, $$info{coverage_pugixpath}); - - print ""; - } - else - { - print "" - } - } - - print "\n"; -} - -# print footer -$date = localtime; - -print <
-Generated on $date from Subversion r$revision - -END +#!/usr/bin/perl + +# pretty-printing +sub prettysuffix +{ + my $suffix = shift; + + return " C++0x" if ($suffix eq '_0x'); + return " x64" if ($suffix eq '_x64'); + return " PPC" if ($suffix eq '_ppc'); + + return ""; +} + +sub prettytoolset +{ + my $toolset = shift; + + return "Borland C++ 5.82" if ($toolset eq 'bcc'); + return "Metrowerks CodeWarrior 8" if ($toolset eq 'cw'); + return "Digital Mars C++ 8.51" if ($toolset eq 'dmc'); + return "Sun C++ 5.10" . prettysuffix($1) if ($toolset =~ /^suncc(.*)$/); + + return "Intel C++ Compiler $1.0" . prettysuffix($2) if ($toolset =~ /^ic(\d+)(.*)$/); + return "MinGW (GCC $1.$2)" . prettysuffix($3) if ($toolset =~ /^mingw(\d)(\d)(.*)$/); + return "Microsoft Visual C++ 7.1" if ($toolset eq 'msvc71'); + return "Microsoft Visual C++ $1.0" . prettysuffix($2) if ($toolset =~ /^msvc(\d+)(.*)$/); + return "GNU C++ Compiler $1" . prettysuffix($2) if ($toolset =~ /^gcc([\d.]*)(.*)$/); + + return "Microsoft Xbox360 Compiler" if ($toolset =~ /^xbox360/); + return "Sony PlayStation3 GCC" if ($toolset =~ /^ps3_gcc/); + return "Sony PlayStation3 SNC" if ($toolset =~ /^ps3_snc/); + + $toolset; +} + +sub prettyplatform +{ + my ($platform, $toolset) = @_; + + return "solaris" if ($platform =~ /solaris/); + + return "macos" if ($platform =~ /darwin/); + + return "linux64" if ($platform =~ /64-linux/); + return "linux32" if ($platform =~ /86-linux/); + + return "fbsd64" if ($platform =~ /64-freebsd/); + return "fbsd32" if ($platform =~ /86-freebsd/); + + return "x360" if ($toolset =~ /^xbox360/); + return "ps3" if ($toolset =~ /^ps3/); + + return "win64" if ($platform =~ /MSWin32-x64/); + return "win32" if ($platform =~ /MSWin32/); + + $platform; +} + +# parse build log +%results = (); +%toolsets = (); +%defines = (); +%configurations = (); + +sub insertindex +{ + my ($hash, $key) = @_; + + $$hash{$key} = scalar(keys %$hash) unless defined $$hash{$key}; +} + +while (<>) +{ + ### autotest i386-freebsd-64int gcc release [wchar] result 0 97.78 98.85 + if (/^### autotest (\S+) (\S+) (\S+) \[(.*?)\] (.*)/) + { + my ($platform, $toolset, $configuration, $defineset, $info) = ($1, $2, $3, $4, $5); + + my $fulltool = &prettyplatform($platform, $toolset) . ' ' . &prettytoolset($toolset); + my $fullconf = "$configuration $defineset"; + + if ($info =~ /^prepare/) + { + $results{$fulltool}{$fullconf}{result} = 1; + } + elsif ($info =~ /^success/) + { + $results{$fulltool}{$fullconf}{result} = 0; + } + elsif ($info =~ /^coverage (\S+) (\S+)/) + { + $results{$fulltool}{$fullconf}{"coverage_$1"} = $2; + } + else + { + print STDERR "Unrecognized autotest infoline $_"; + } + + &insertindex(\%toolsets, $fulltool); + + $defines{$_} = 1 foreach (split /,/, $defineset); + &insertindex(\%configurations, $fullconf); + } + elsif (/^### autotest revision (\d+)/) + { + if (defined $revision && $revision != $1) + { + print STDERR "Autotest build report contains several revisions: $revision, $1\n"; + } + else + { + $revision = $1; + } + } +} + +# make arrays of toolsets and configurations +@toolsetarray = (); +@configurationarray = (); + +$toolsetarray[$toolsets{$_}] = $_ foreach (keys %toolsets); +$configurationarray[$configurations{$_}] = $_ foreach (keys %configurations); + +# print header +$stylesheet = <pugixml autotest report +

pugixml autotest report

+
configuration".(split /\s+/)[0]."
$define" . ($present ? "+" : " ") . "
$platform$toolset pass"; - - if ($coverage_pugixml > 0 || $coverage_pugixpath > 0) - { - print "
" . ($coverage_pugixml + 0) . "%
" . ($coverage_pugixpath + 0) . "%
"; - } - - print "
fail
+END + +# print configuration header (release/debug) +print ""; +print "" foreach (@configurationarray); +print "\n"; + +# print defines header (one row for each define) +foreach $define (sort {$a cmp $b} keys %defines) +{ + print ""; + + foreach (@configurationarray) + { + my $present = ($_ =~ /\b$define\b/); + my $color = $present ? "#cccccc" : "#ffffff"; + print ""; + } + print "\n"; +} + +# print data (one row for each toolset) +foreach $tool (@toolsetarray) +{ + my ($platform, $toolset) = split(/\s+/, $tool, 2); + print ""; + + foreach (@configurationarray) + { + my $info = $results{$tool}{$_}; + + if (!defined $$info{result}) + { + print ""; + } + elsif ($$info{result} == 0) + { + my ($coverage_pugixml, $coverage_pugixpath) = ($$info{coverage_pugixml}, $$info{coverage_pugixpath}); + + print ""; + } + else + { + print "" + } + } + + print "\n"; +} + +# print footer +$date = localtime; + +print <
+Generated on $date from Subversion r$revision + +END diff --git a/tests/common.hpp b/tests/common.hpp index b466c09..35e4717 100644 --- a/tests/common.hpp +++ b/tests/common.hpp @@ -1,8 +1,8 @@ -#ifndef HEADER_TEST_COMMON_HPP -#define HEADER_TEST_COMMON_HPP - -#include "test.hpp" - -using namespace pugi; - -#endif +#ifndef HEADER_TEST_COMMON_HPP +#define HEADER_TEST_COMMON_HPP + +#include "test.hpp" + +using namespace pugi; + +#endif diff --git a/tests/data/multiline.xml b/tests/data/multiline.xml index 3607e7f..0f0fe3c 100644 --- a/tests/data/multiline.xml +++ b/tests/data/multiline.xml @@ -1,3 +1,3 @@ - - - + + + diff --git a/tests/gcov-filter.pl b/tests/gcov-filter.pl index 8cbccc5..c68aa1f 100644 --- a/tests/gcov-filter.pl +++ b/tests/gcov-filter.pl @@ -1,13 +1,13 @@ -#!/usr/bin/perl - -$prefix = join(' ', @ARGV); -$prefix .= ' ' if ($prefix ne ''); - -$lines = join('', ); -$lines =~ s/File (.+)\nLines (.+)\n(.+\n)*\n/$1 $2\n/g; -$lines =~ s/.+include\/c\+\+.+\n//g; - -foreach $line (split /\n/, $lines) -{ - print "$prefix$line\n"; -} +#!/usr/bin/perl + +$prefix = join(' ', @ARGV); +$prefix .= ' ' if ($prefix ne ''); + +$lines = join('', ); +$lines =~ s/File (.+)\nLines (.+)\n(.+\n)*\n/$1 $2\n/g; +$lines =~ s/.+include\/c\+\+.+\n//g; + +foreach $line (split /\n/, $lines) +{ + print "$prefix$line\n"; +} diff --git a/tests/helpers.hpp b/tests/helpers.hpp index b160a85..abe6626 100644 --- a/tests/helpers.hpp +++ b/tests/helpers.hpp @@ -1,97 +1,97 @@ -#ifndef HEADER_TEST_HELPERS_HPP -#define HEADER_TEST_HELPERS_HPP - -#include "common.hpp" - -#include - -template static void generic_bool_ops_test(const T& obj) -{ - T null; - - CHECK(!null); - CHECK(obj); - CHECK(!!obj); - - bool b1 = null, b2 = obj; - - CHECK(!b1); - CHECK(b2); - - CHECK(obj && b2); - CHECK(obj || b2); - CHECK(obj && obj); - CHECK(obj || obj); -} - -template static void generic_eq_ops_test(const T& obj1, const T& obj2) -{ - T null = T(); - - // operator== - CHECK(null == null); - CHECK(obj1 == obj1); - CHECK(!(null == obj1)); - CHECK(!(null == obj2)); - CHECK(T(null) == null); - CHECK(T(obj1) == obj1); - - // operator!= - CHECK(!(null != null)); - CHECK(!(obj1 != obj1)); - CHECK(null != obj1); - CHECK(null != obj2); - CHECK(!(T(null) != null)); - CHECK(!(T(obj1) != obj1)); -} - -template static void generic_rel_ops_test(T obj1, T obj2) -{ - T null = T(); - - // obj1 < obj2 (we use operator<, but there is no other choice - if (obj1 > obj2) - { - T temp = obj1; - obj1 = obj2; - obj2 = temp; - } - - // operator< - CHECK(null < obj1); - CHECK(null < obj2); - CHECK(obj1 < obj2); - CHECK(!(null < null)); - CHECK(!(obj1 < obj1)); - CHECK(!(obj1 < null)); - CHECK(!(obj2 < obj1)); - - // operator<= - CHECK(null <= obj1); - CHECK(null <= obj2); - CHECK(obj1 <= obj2); - CHECK(null <= null); - CHECK(obj1 <= obj1); - CHECK(!(obj1 <= null)); - CHECK(!(obj2 <= obj1)); - - // operator> - CHECK(obj1 > null); - CHECK(obj2 > null); - CHECK(obj2 > obj1); - CHECK(!(null > null)); - CHECK(!(obj1 > obj1)); - CHECK(!(null > obj1)); - CHECK(!(obj1 > obj2)); - - // operator>= - CHECK(obj1 >= null); - CHECK(obj2 >= null); - CHECK(obj2 >= obj1); - CHECK(null >= null); - CHECK(obj1 >= obj1); - CHECK(!(null >= obj1)); - CHECK(!(obj1 >= obj2)); -} - -#endif +#ifndef HEADER_TEST_HELPERS_HPP +#define HEADER_TEST_HELPERS_HPP + +#include "common.hpp" + +#include + +template static void generic_bool_ops_test(const T& obj) +{ + T null; + + CHECK(!null); + CHECK(obj); + CHECK(!!obj); + + bool b1 = null, b2 = obj; + + CHECK(!b1); + CHECK(b2); + + CHECK(obj && b2); + CHECK(obj || b2); + CHECK(obj && obj); + CHECK(obj || obj); +} + +template static void generic_eq_ops_test(const T& obj1, const T& obj2) +{ + T null = T(); + + // operator== + CHECK(null == null); + CHECK(obj1 == obj1); + CHECK(!(null == obj1)); + CHECK(!(null == obj2)); + CHECK(T(null) == null); + CHECK(T(obj1) == obj1); + + // operator!= + CHECK(!(null != null)); + CHECK(!(obj1 != obj1)); + CHECK(null != obj1); + CHECK(null != obj2); + CHECK(!(T(null) != null)); + CHECK(!(T(obj1) != obj1)); +} + +template static void generic_rel_ops_test(T obj1, T obj2) +{ + T null = T(); + + // obj1 < obj2 (we use operator<, but there is no other choice + if (obj1 > obj2) + { + T temp = obj1; + obj1 = obj2; + obj2 = temp; + } + + // operator< + CHECK(null < obj1); + CHECK(null < obj2); + CHECK(obj1 < obj2); + CHECK(!(null < null)); + CHECK(!(obj1 < obj1)); + CHECK(!(obj1 < null)); + CHECK(!(obj2 < obj1)); + + // operator<= + CHECK(null <= obj1); + CHECK(null <= obj2); + CHECK(obj1 <= obj2); + CHECK(null <= null); + CHECK(obj1 <= obj1); + CHECK(!(obj1 <= null)); + CHECK(!(obj2 <= obj1)); + + // operator> + CHECK(obj1 > null); + CHECK(obj2 > null); + CHECK(obj2 > obj1); + CHECK(!(null > null)); + CHECK(!(obj1 > obj1)); + CHECK(!(null > obj1)); + CHECK(!(obj1 > obj2)); + + // operator>= + CHECK(obj1 >= null); + CHECK(obj2 >= null); + CHECK(obj2 >= obj1); + CHECK(null >= null); + CHECK(obj1 >= obj1); + CHECK(!(null >= obj1)); + CHECK(!(obj1 >= obj2)); +} + +#endif diff --git a/tests/main.cpp b/tests/main.cpp index 4330009..021c253 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -1,149 +1,149 @@ -#include "test.hpp" -#include "allocator.hpp" - -#include -#include -#include - -test_runner* test_runner::_tests = 0; -size_t test_runner::_memory_fail_threshold = 0; -jmp_buf test_runner::_failure_buffer; -const char* test_runner::_failure_message; - -static size_t g_memory_total_size = 0; -static size_t g_memory_total_count = 0; - -static void* custom_allocate(size_t size) -{ - if (test_runner::_memory_fail_threshold > 0 && test_runner::_memory_fail_threshold < g_memory_total_size + size) - return 0; - else - { - void* ptr = memory_allocate(size); - - g_memory_total_size += memory_size(ptr); - g_memory_total_count++; - - return ptr; - } -} - -static void custom_deallocate(void* ptr) -{ - if (ptr) - { - g_memory_total_size -= memory_size(ptr); - g_memory_total_count--; - - memory_deallocate(ptr); - } -} - -static void replace_memory_management() -{ - // create some document to touch original functions - { - pugi::xml_document doc; - doc.append_child().set_name(STR("node")); - } - - // replace functions - pugi::set_memory_management_functions(custom_allocate, custom_deallocate); -} - -#if defined(_MSC_VER) && _MSC_VER > 1200 && _MSC_VER < 1400 && !defined(__INTEL_COMPILER) && !defined(__DMC__) -namespace std -{ - _CRTIMP2 _Prhand _Raise_handler; - _CRTIMP2 void __cdecl _Throw(const exception&) {} -} -#endif - -static bool run_test(test_runner* test) -{ -#ifndef PUGIXML_NO_EXCEPTIONS - try - { -#endif - g_memory_total_size = 0; - g_memory_total_count = 0; - test_runner::_memory_fail_threshold = 0; - -#ifdef _MSC_VER -# pragma warning(push) -# pragma warning(disable: 4611) // interaction between _setjmp and C++ object destruction is non-portable -#endif - - volatile int result = setjmp(test_runner::_failure_buffer); - -#ifdef _MSC_VER -# pragma warning(pop) -#endif - - if (result) - { - printf("Test %s failed: %s\n", test->_name, test_runner::_failure_message); - return false; - } - - test->run(); - - if (g_memory_total_size != 0 || g_memory_total_count != 0) - { - printf("Test %s failed: memory leaks found (%u bytes in %u allocations)\n", test->_name, (unsigned int)g_memory_total_size, (unsigned int)g_memory_total_count); - return false; - } - - return true; -#ifndef PUGIXML_NO_EXCEPTIONS - } - catch (const std::exception& e) - { - printf("Test %s failed: exception %s\n", test->_name, e.what()); - return false; - } - catch (...) - { - printf("Test %s failed for unknown reason\n", test->_name); - return false; - } -#endif -} - -#if defined(__CELLOS_LV2__) && defined(PUGIXML_NO_EXCEPTIONS) && !defined(__SNC__) -#include - -void std::exception::_Raise() const -{ - abort(); -} -#endif - -int main() -{ -#ifdef __BORLANDC__ - _control87(MCW_EM | PC_53, MCW_EM | MCW_PC); -#endif - - replace_memory_management(); - - unsigned int total = 0; - unsigned int passed = 0; - - test_runner* test = 0; // gcc3 "variable might be used uninitialized in this function" bug workaround - - for (test = test_runner::_tests; test; test = test->_next) - { - total++; - passed += run_test(test); - } - - unsigned int failed = total - passed; - - if (failed != 0) - printf("FAILURE: %u out of %u tests failed.\n", failed, total); - else - printf("Success: %u tests passed.\n", total); - - return failed; -} +#include "test.hpp" +#include "allocator.hpp" + +#include +#include +#include + +test_runner* test_runner::_tests = 0; +size_t test_runner::_memory_fail_threshold = 0; +jmp_buf test_runner::_failure_buffer; +const char* test_runner::_failure_message; + +static size_t g_memory_total_size = 0; +static size_t g_memory_total_count = 0; + +static void* custom_allocate(size_t size) +{ + if (test_runner::_memory_fail_threshold > 0 && test_runner::_memory_fail_threshold < g_memory_total_size + size) + return 0; + else + { + void* ptr = memory_allocate(size); + + g_memory_total_size += memory_size(ptr); + g_memory_total_count++; + + return ptr; + } +} + +static void custom_deallocate(void* ptr) +{ + if (ptr) + { + g_memory_total_size -= memory_size(ptr); + g_memory_total_count--; + + memory_deallocate(ptr); + } +} + +static void replace_memory_management() +{ + // create some document to touch original functions + { + pugi::xml_document doc; + doc.append_child().set_name(STR("node")); + } + + // replace functions + pugi::set_memory_management_functions(custom_allocate, custom_deallocate); +} + +#if defined(_MSC_VER) && _MSC_VER > 1200 && _MSC_VER < 1400 && !defined(__INTEL_COMPILER) && !defined(__DMC__) +namespace std +{ + _CRTIMP2 _Prhand _Raise_handler; + _CRTIMP2 void __cdecl _Throw(const exception&) {} +} +#endif + +static bool run_test(test_runner* test) +{ +#ifndef PUGIXML_NO_EXCEPTIONS + try + { +#endif + g_memory_total_size = 0; + g_memory_total_count = 0; + test_runner::_memory_fail_threshold = 0; + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4611) // interaction between _setjmp and C++ object destruction is non-portable +#endif + + volatile int result = setjmp(test_runner::_failure_buffer); + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + + if (result) + { + printf("Test %s failed: %s\n", test->_name, test_runner::_failure_message); + return false; + } + + test->run(); + + if (g_memory_total_size != 0 || g_memory_total_count != 0) + { + printf("Test %s failed: memory leaks found (%u bytes in %u allocations)\n", test->_name, (unsigned int)g_memory_total_size, (unsigned int)g_memory_total_count); + return false; + } + + return true; +#ifndef PUGIXML_NO_EXCEPTIONS + } + catch (const std::exception& e) + { + printf("Test %s failed: exception %s\n", test->_name, e.what()); + return false; + } + catch (...) + { + printf("Test %s failed for unknown reason\n", test->_name); + return false; + } +#endif +} + +#if defined(__CELLOS_LV2__) && defined(PUGIXML_NO_EXCEPTIONS) && !defined(__SNC__) +#include + +void std::exception::_Raise() const +{ + abort(); +} +#endif + +int main() +{ +#ifdef __BORLANDC__ + _control87(MCW_EM | PC_53, MCW_EM | MCW_PC); +#endif + + replace_memory_management(); + + unsigned int total = 0; + unsigned int passed = 0; + + test_runner* test = 0; // gcc3 "variable might be used uninitialized in this function" bug workaround + + for (test = test_runner::_tests; test; test = test->_next) + { + total++; + passed += run_test(test); + } + + unsigned int failed = total - passed; + + if (failed != 0) + printf("FAILURE: %u out of %u tests failed.\n", failed, total); + else + printf("Success: %u tests passed.\n", total); + + return failed; +} diff --git a/tests/test.cpp b/tests/test.cpp index 862a0ea..29d74c1 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -1,181 +1,181 @@ -#define _SCL_SECURE_NO_WARNINGS - -#include "test.hpp" - -#include "writer_string.hpp" - -#include -#include - -#include -#include - -#ifndef PUGIXML_NO_XPATH -static void build_document_order(std::vector& result, pugi::xml_node root) -{ - result.push_back(pugi::xpath_node()); - - pugi::xml_node cur = root; - - for (;;) - { - result.push_back(cur); - - for (pugi::xml_attribute a = cur.first_attribute(); a; a = a.next_attribute()) - result.push_back(pugi::xpath_node(a, cur)); - - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - while (cur && !cur.next_sibling()) cur = cur.parent(); - cur = cur.next_sibling(); - - if (!cur) break; - } - } -} -#endif - -bool test_string_equal(const pugi::char_t* lhs, const pugi::char_t* rhs) -{ - return (!lhs || !rhs) ? lhs == rhs : pugi::impl::strequal(lhs, rhs); -} - -bool test_node(const pugi::xml_node& node, const pugi::char_t* contents, const pugi::char_t* indent, unsigned int flags) -{ - xml_writer_string writer; - - node.print(writer, indent, flags, get_native_encoding()); - - return writer.as_string() == contents; -} - -#ifndef PUGIXML_NO_XPATH -bool test_xpath_string(const pugi::xml_node& node, const pugi::char_t* query, const pugi::char_t* expected) -{ - pugi::xpath_query q(query); - - return q.evaluate_string(node) == expected; -} - -bool test_xpath_boolean(const pugi::xml_node& node, const pugi::char_t* query, bool expected) -{ - pugi::xpath_query q(query); - - return q.evaluate_boolean(node) == expected; -} - -#include - -bool test_xpath_number(const pugi::xml_node& node, const pugi::char_t* query, double expected) -{ - pugi::xpath_query q(query); - - double value = q.evaluate_number(node); - double absolute_error = fabs(value - expected); - - const double tolerance = 1e-15f; - return absolute_error < tolerance || absolute_error < fabs(expected) * tolerance; -} - -bool test_xpath_number_nan(const pugi::xml_node& node, const pugi::char_t* query) -{ - pugi::xpath_query q(query); - - double r = q.evaluate_number(node); - -#if defined(_MSC_VER) || defined(__BORLANDC__) - return _isnan(r) != 0; -#else - return r != r; -#endif -} - -bool test_xpath_fail_compile(const pugi::char_t* query) -{ - try - { - pugi::xpath_query q(query); - return false; - } - catch (const pugi::xpath_exception&) - { - return true; - } -} - -void xpath_node_set_tester::check(bool condition) -{ - if (!condition) - { - test_runner::_failure_message = message; - longjmp(test_runner::_failure_buffer, 1); - } -} - -xpath_node_set_tester::xpath_node_set_tester(const pugi::xpath_node_set& set, const char* message): last(0), message(message) -{ - result = set; - - // only sort unsorted sets so that we're able to verify reverse order for some axes - if (result.type() == pugi::xpath_node_set::type_unsorted) result.sort(); - - if (result.empty()) - { - document_order = 0; - document_size = 0; - } - else - { - std::vector order; - build_document_order(order, (result[0].attribute() ? result[0].parent() : result[0].node()).root()); - - document_order = new pugi::xpath_node[order.size()]; - std::copy(order.begin(), order.end(), document_order); - - document_size = order.size(); - } -} - -xpath_node_set_tester::~xpath_node_set_tester() -{ - // check that we processed everything - check(last == result.size()); - - delete[] document_order; -} - -xpath_node_set_tester& xpath_node_set_tester::operator%(unsigned int expected) -{ - // check element count - check(last < result.size()); - - // check document order - check(expected < document_size); - check(result.begin()[last] == document_order[expected]); - - // continue to the next element - last++; - - return *this; -} - -#endif - -bool is_little_endian() -{ - unsigned int ui = 1; - return *reinterpret_cast(&ui) == 1; -} - -pugi::xml_encoding get_native_encoding() -{ -#ifdef PUGIXML_WCHAR_MODE - return pugi::encoding_wchar; -#else - return pugi::encoding_utf8; -#endif -} +#define _SCL_SECURE_NO_WARNINGS + +#include "test.hpp" + +#include "writer_string.hpp" + +#include +#include + +#include +#include + +#ifndef PUGIXML_NO_XPATH +static void build_document_order(std::vector& result, pugi::xml_node root) +{ + result.push_back(pugi::xpath_node()); + + pugi::xml_node cur = root; + + for (;;) + { + result.push_back(cur); + + for (pugi::xml_attribute a = cur.first_attribute(); a; a = a.next_attribute()) + result.push_back(pugi::xpath_node(a, cur)); + + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + while (cur && !cur.next_sibling()) cur = cur.parent(); + cur = cur.next_sibling(); + + if (!cur) break; + } + } +} +#endif + +bool test_string_equal(const pugi::char_t* lhs, const pugi::char_t* rhs) +{ + return (!lhs || !rhs) ? lhs == rhs : pugi::impl::strequal(lhs, rhs); +} + +bool test_node(const pugi::xml_node& node, const pugi::char_t* contents, const pugi::char_t* indent, unsigned int flags) +{ + xml_writer_string writer; + + node.print(writer, indent, flags, get_native_encoding()); + + return writer.as_string() == contents; +} + +#ifndef PUGIXML_NO_XPATH +bool test_xpath_string(const pugi::xml_node& node, const pugi::char_t* query, const pugi::char_t* expected) +{ + pugi::xpath_query q(query); + + return q.evaluate_string(node) == expected; +} + +bool test_xpath_boolean(const pugi::xml_node& node, const pugi::char_t* query, bool expected) +{ + pugi::xpath_query q(query); + + return q.evaluate_boolean(node) == expected; +} + +#include + +bool test_xpath_number(const pugi::xml_node& node, const pugi::char_t* query, double expected) +{ + pugi::xpath_query q(query); + + double value = q.evaluate_number(node); + double absolute_error = fabs(value - expected); + + const double tolerance = 1e-15f; + return absolute_error < tolerance || absolute_error < fabs(expected) * tolerance; +} + +bool test_xpath_number_nan(const pugi::xml_node& node, const pugi::char_t* query) +{ + pugi::xpath_query q(query); + + double r = q.evaluate_number(node); + +#if defined(_MSC_VER) || defined(__BORLANDC__) + return _isnan(r) != 0; +#else + return r != r; +#endif +} + +bool test_xpath_fail_compile(const pugi::char_t* query) +{ + try + { + pugi::xpath_query q(query); + return false; + } + catch (const pugi::xpath_exception&) + { + return true; + } +} + +void xpath_node_set_tester::check(bool condition) +{ + if (!condition) + { + test_runner::_failure_message = message; + longjmp(test_runner::_failure_buffer, 1); + } +} + +xpath_node_set_tester::xpath_node_set_tester(const pugi::xpath_node_set& set, const char* message): last(0), message(message) +{ + result = set; + + // only sort unsorted sets so that we're able to verify reverse order for some axes + if (result.type() == pugi::xpath_node_set::type_unsorted) result.sort(); + + if (result.empty()) + { + document_order = 0; + document_size = 0; + } + else + { + std::vector order; + build_document_order(order, (result[0].attribute() ? result[0].parent() : result[0].node()).root()); + + document_order = new pugi::xpath_node[order.size()]; + std::copy(order.begin(), order.end(), document_order); + + document_size = order.size(); + } +} + +xpath_node_set_tester::~xpath_node_set_tester() +{ + // check that we processed everything + check(last == result.size()); + + delete[] document_order; +} + +xpath_node_set_tester& xpath_node_set_tester::operator%(unsigned int expected) +{ + // check element count + check(last < result.size()); + + // check document order + check(expected < document_size); + check(result.begin()[last] == document_order[expected]); + + // continue to the next element + last++; + + return *this; +} + +#endif + +bool is_little_endian() +{ + unsigned int ui = 1; + return *reinterpret_cast(&ui) == 1; +} + +pugi::xml_encoding get_native_encoding() +{ +#ifdef PUGIXML_WCHAR_MODE + return pugi::encoding_wchar; +#else + return pugi::encoding_utf8; +#endif +} diff --git a/tests/test.hpp b/tests/test.hpp index d4b5879..c269fb5 100644 --- a/tests/test.hpp +++ b/tests/test.hpp @@ -1,151 +1,151 @@ -#ifndef HEADER_TEST_TEST_HPP -#define HEADER_TEST_TEST_HPP - -#include "../src/pugixml.hpp" - -#include - -struct test_runner -{ - test_runner(const char* name) - { - _name = name; - _next = _tests; - _tests = this; - } - - virtual ~test_runner() {} - - virtual void run() = 0; - - const char* _name; - test_runner* _next; - - static test_runner* _tests; - static size_t _memory_fail_threshold; - static jmp_buf _failure_buffer; - static const char* _failure_message; -}; - -bool test_string_equal(const pugi::char_t* lhs, const pugi::char_t* rhs); - -template inline bool test_node_name_value(const Node& node, const pugi::char_t* name, const pugi::char_t* value) -{ - return test_string_equal(node.name(), name) && test_string_equal(node.value(), value); -} - -bool test_node(const pugi::xml_node& node, const pugi::char_t* contents, const pugi::char_t* indent, unsigned int flags); - -#ifndef PUGIXML_NO_XPATH -bool test_xpath_string(const pugi::xml_node& node, const pugi::char_t* query, const pugi::char_t* expected); -bool test_xpath_boolean(const pugi::xml_node& node, const pugi::char_t* query, bool expected); -bool test_xpath_number(const pugi::xml_node& node, const pugi::char_t* query, double expected); -bool test_xpath_number_nan(const pugi::xml_node& node, const pugi::char_t* query); -bool test_xpath_fail_compile(const pugi::char_t* query); - -struct xpath_node_set_tester -{ - pugi::xpath_node* document_order; - size_t document_size; - - pugi::xpath_node_set result; - unsigned int last; - const char* message; - - void check(bool condition); - - xpath_node_set_tester(const pugi::xpath_node_set& set, const char* message); - ~xpath_node_set_tester(); - - xpath_node_set_tester& operator%(unsigned int expected); -}; - -#endif - -struct dummy_fixture {}; - -#define TEST_FIXTURE(name, fixture) \ - struct test_runner_helper_##name: fixture \ - { \ - void run(); \ - }; \ - static struct test_runner_##name: test_runner \ - { \ - test_runner_##name(): test_runner(#name) {} \ - \ - virtual void run() \ - { \ - test_runner_helper_##name helper; \ - helper.run(); \ - } \ - } test_runner_instance_##name; \ - void test_runner_helper_##name::run() - -#define TEST(name) TEST_FIXTURE(name, dummy_fixture) - -#define TEST_XML_FLAGS(name, xml, flags) \ - struct test_fixture_##name \ - { \ - pugi::xml_document doc; \ - \ - test_fixture_##name() \ - { \ - CHECK(doc.load(PUGIXML_TEXT(xml), flags)); \ - } \ - \ - private: \ - test_fixture_##name(const test_fixture_##name&); \ - test_fixture_##name& operator=(const test_fixture_##name&); \ - }; \ - \ - TEST_FIXTURE(name, test_fixture_##name) - -#define TEST_XML(name, xml) TEST_XML_FLAGS(name, xml, pugi::parse_default) - -#define CHECK_JOIN(text, file, line) text file #line -#define CHECK_JOIN2(text, file, line) CHECK_JOIN(text, file, line) -#define CHECK_TEXT(condition, text) if (condition) ; else test_runner::_failure_message = CHECK_JOIN2(text, " at "__FILE__ ":", __LINE__), longjmp(test_runner::_failure_buffer, 1) - -#if (defined(_MSC_VER) && _MSC_VER == 1200) || defined(__MWERKS__) -# define STRINGIZE(value) "??" // MSVC 6.0 and CodeWarrior have troubles stringizing stuff with strings w/escaping inside -#else -# define STRINGIZE(value) #value -#endif - -#define CHECK(condition) CHECK_TEXT(condition, STRINGIZE(condition) " is false") -#define CHECK_STRING(value, expected) CHECK_TEXT(test_string_equal(value, expected), STRINGIZE(value) " is not equal to " STRINGIZE(expected)) -#define CHECK_DOUBLE(value, expected) CHECK_TEXT((value > expected ? value - expected : expected - value) < 1e-6, STRINGIZE(value) " is not equal to " STRINGIZE(expected)) -#define CHECK_NAME_VALUE(node, name, value) CHECK_TEXT(test_node_name_value(node, name, value), STRINGIZE(node) " name/value do not match " STRINGIZE(name) " and " STRINGIZE(value)) -#define CHECK_NODE_EX(node, expected, indent, flags) CHECK_TEXT(test_node(node, expected, indent, flags), STRINGIZE(node) " contents does not match " STRINGIZE(expected)) -#define CHECK_NODE(node, expected) CHECK_NODE_EX(node, expected, PUGIXML_TEXT(""), pugi::format_raw) - -#ifndef PUGIXML_NO_XPATH -#define CHECK_XPATH_STRING(node, query, expected) CHECK_TEXT(test_xpath_string(node, query, expected), STRINGIZE(query) " does not evaluate to " STRINGIZE(expected) " in context " STRINGIZE(node)) -#define CHECK_XPATH_BOOLEAN(node, query, expected) CHECK_TEXT(test_xpath_boolean(node, query, expected), STRINGIZE(query) " does not evaluate to " STRINGIZE(expected) " in context " STRINGIZE(node)) -#define CHECK_XPATH_NUMBER(node, query, expected) CHECK_TEXT(test_xpath_number(node, query, expected), STRINGIZE(query) " does not evaluate to " STRINGIZE(expected) " in context " STRINGIZE(node)) -#define CHECK_XPATH_NUMBER_NAN(node, query) CHECK_TEXT(test_xpath_number_nan(node, query), STRINGIZE(query) " does not evaluate to NaN in context " STRINGIZE(node)) -#define CHECK_XPATH_FAIL(query) CHECK_TEXT(test_xpath_fail_compile(query), STRINGIZE(query) " should not compile") -#define CHECK_XPATH_NODESET(node, query) xpath_node_set_tester(node.select_nodes(query), CHECK_JOIN2(STRINGIZE(query) " does not evaluate to expected set in context " STRINGIZE(node), " at "__FILE__ ":", __LINE__)) -#endif - -#define STR(text) PUGIXML_TEXT(text) - -#ifdef __DMC__ -#define U_LITERALS // DMC does not understand \x01234 (it parses first three digits), but understands \u01234 -#endif - -#if (defined(_MSC_VER) && _MSC_VER == 1200) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER == 800) || defined(__BORLANDC__) -// NaN comparison on MSVC6 is incorrect, see http://www.nabble.com/assertDoubleEquals,-NaN---Microsoft-Visual-Studio-6-td9137859.html -// IC8 and BCC are also affected by the same bug -# define MSVC6_NAN_BUG -#endif - -inline wchar_t wchar_cast(unsigned int value) -{ - return static_cast(value); // to avoid C4310 on MSVC -} - -bool is_little_endian(); -pugi::xml_encoding get_native_encoding(); - -#endif +#ifndef HEADER_TEST_TEST_HPP +#define HEADER_TEST_TEST_HPP + +#include "../src/pugixml.hpp" + +#include + +struct test_runner +{ + test_runner(const char* name) + { + _name = name; + _next = _tests; + _tests = this; + } + + virtual ~test_runner() {} + + virtual void run() = 0; + + const char* _name; + test_runner* _next; + + static test_runner* _tests; + static size_t _memory_fail_threshold; + static jmp_buf _failure_buffer; + static const char* _failure_message; +}; + +bool test_string_equal(const pugi::char_t* lhs, const pugi::char_t* rhs); + +template inline bool test_node_name_value(const Node& node, const pugi::char_t* name, const pugi::char_t* value) +{ + return test_string_equal(node.name(), name) && test_string_equal(node.value(), value); +} + +bool test_node(const pugi::xml_node& node, const pugi::char_t* contents, const pugi::char_t* indent, unsigned int flags); + +#ifndef PUGIXML_NO_XPATH +bool test_xpath_string(const pugi::xml_node& node, const pugi::char_t* query, const pugi::char_t* expected); +bool test_xpath_boolean(const pugi::xml_node& node, const pugi::char_t* query, bool expected); +bool test_xpath_number(const pugi::xml_node& node, const pugi::char_t* query, double expected); +bool test_xpath_number_nan(const pugi::xml_node& node, const pugi::char_t* query); +bool test_xpath_fail_compile(const pugi::char_t* query); + +struct xpath_node_set_tester +{ + pugi::xpath_node* document_order; + size_t document_size; + + pugi::xpath_node_set result; + unsigned int last; + const char* message; + + void check(bool condition); + + xpath_node_set_tester(const pugi::xpath_node_set& set, const char* message); + ~xpath_node_set_tester(); + + xpath_node_set_tester& operator%(unsigned int expected); +}; + +#endif + +struct dummy_fixture {}; + +#define TEST_FIXTURE(name, fixture) \ + struct test_runner_helper_##name: fixture \ + { \ + void run(); \ + }; \ + static struct test_runner_##name: test_runner \ + { \ + test_runner_##name(): test_runner(#name) {} \ + \ + virtual void run() \ + { \ + test_runner_helper_##name helper; \ + helper.run(); \ + } \ + } test_runner_instance_##name; \ + void test_runner_helper_##name::run() + +#define TEST(name) TEST_FIXTURE(name, dummy_fixture) + +#define TEST_XML_FLAGS(name, xml, flags) \ + struct test_fixture_##name \ + { \ + pugi::xml_document doc; \ + \ + test_fixture_##name() \ + { \ + CHECK(doc.load(PUGIXML_TEXT(xml), flags)); \ + } \ + \ + private: \ + test_fixture_##name(const test_fixture_##name&); \ + test_fixture_##name& operator=(const test_fixture_##name&); \ + }; \ + \ + TEST_FIXTURE(name, test_fixture_##name) + +#define TEST_XML(name, xml) TEST_XML_FLAGS(name, xml, pugi::parse_default) + +#define CHECK_JOIN(text, file, line) text file #line +#define CHECK_JOIN2(text, file, line) CHECK_JOIN(text, file, line) +#define CHECK_TEXT(condition, text) if (condition) ; else test_runner::_failure_message = CHECK_JOIN2(text, " at "__FILE__ ":", __LINE__), longjmp(test_runner::_failure_buffer, 1) + +#if (defined(_MSC_VER) && _MSC_VER == 1200) || defined(__MWERKS__) +# define STRINGIZE(value) "??" // MSVC 6.0 and CodeWarrior have troubles stringizing stuff with strings w/escaping inside +#else +# define STRINGIZE(value) #value +#endif + +#define CHECK(condition) CHECK_TEXT(condition, STRINGIZE(condition) " is false") +#define CHECK_STRING(value, expected) CHECK_TEXT(test_string_equal(value, expected), STRINGIZE(value) " is not equal to " STRINGIZE(expected)) +#define CHECK_DOUBLE(value, expected) CHECK_TEXT((value > expected ? value - expected : expected - value) < 1e-6, STRINGIZE(value) " is not equal to " STRINGIZE(expected)) +#define CHECK_NAME_VALUE(node, name, value) CHECK_TEXT(test_node_name_value(node, name, value), STRINGIZE(node) " name/value do not match " STRINGIZE(name) " and " STRINGIZE(value)) +#define CHECK_NODE_EX(node, expected, indent, flags) CHECK_TEXT(test_node(node, expected, indent, flags), STRINGIZE(node) " contents does not match " STRINGIZE(expected)) +#define CHECK_NODE(node, expected) CHECK_NODE_EX(node, expected, PUGIXML_TEXT(""), pugi::format_raw) + +#ifndef PUGIXML_NO_XPATH +#define CHECK_XPATH_STRING(node, query, expected) CHECK_TEXT(test_xpath_string(node, query, expected), STRINGIZE(query) " does not evaluate to " STRINGIZE(expected) " in context " STRINGIZE(node)) +#define CHECK_XPATH_BOOLEAN(node, query, expected) CHECK_TEXT(test_xpath_boolean(node, query, expected), STRINGIZE(query) " does not evaluate to " STRINGIZE(expected) " in context " STRINGIZE(node)) +#define CHECK_XPATH_NUMBER(node, query, expected) CHECK_TEXT(test_xpath_number(node, query, expected), STRINGIZE(query) " does not evaluate to " STRINGIZE(expected) " in context " STRINGIZE(node)) +#define CHECK_XPATH_NUMBER_NAN(node, query) CHECK_TEXT(test_xpath_number_nan(node, query), STRINGIZE(query) " does not evaluate to NaN in context " STRINGIZE(node)) +#define CHECK_XPATH_FAIL(query) CHECK_TEXT(test_xpath_fail_compile(query), STRINGIZE(query) " should not compile") +#define CHECK_XPATH_NODESET(node, query) xpath_node_set_tester(node.select_nodes(query), CHECK_JOIN2(STRINGIZE(query) " does not evaluate to expected set in context " STRINGIZE(node), " at "__FILE__ ":", __LINE__)) +#endif + +#define STR(text) PUGIXML_TEXT(text) + +#ifdef __DMC__ +#define U_LITERALS // DMC does not understand \x01234 (it parses first three digits), but understands \u01234 +#endif + +#if (defined(_MSC_VER) && _MSC_VER == 1200) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER == 800) || defined(__BORLANDC__) +// NaN comparison on MSVC6 is incorrect, see http://www.nabble.com/assertDoubleEquals,-NaN---Microsoft-Visual-Studio-6-td9137859.html +// IC8 and BCC are also affected by the same bug +# define MSVC6_NAN_BUG +#endif + +inline wchar_t wchar_cast(unsigned int value) +{ + return static_cast(value); // to avoid C4310 on MSVC +} + +bool is_little_endian(); +pugi::xml_encoding get_native_encoding(); + +#endif diff --git a/tests/test_deprecated.cpp b/tests/test_deprecated.cpp index d81810e..4d97b2a 100644 --- a/tests/test_deprecated.cpp +++ b/tests/test_deprecated.cpp @@ -1,203 +1,203 @@ -// This file includes all tests for deprecated functionality; this is going away in the next release! - -#ifdef _MSC_VER -# pragma warning(disable: 4996) -#endif - -#ifdef __GNUC__ -# if __GNUC__ >= 4 && __GNUC_MINOR__ >= 2 -# pragma GCC diagnostic ignored "-Wdeprecated-declarations" -# else -# define PUGIXML_DEPRECATED -# endif -#endif - -#ifdef __INTEL_COMPILER -# pragma warning(disable: 1478) -#endif - -#include - -#include "common.hpp" - -#include "writer_string.hpp" - -#include -#include - -// format_write_bom_utf8 - it's now format_write_bom! -TEST_XML(document_save_bom_utf8, "") -{ - xml_writer_string writer; - - CHECK(test_save_narrow(doc, pugi::format_no_declaration | pugi::format_raw | pugi::format_write_bom_utf8, encoding_utf8, "\xef\xbb\xbf", 11)); -} - -// parse - it's now load_buffer_inplace -TEST(document_parse) -{ - char text[] = ""; - - pugi::xml_document doc; - - CHECK(doc.parse(text)); - CHECK_NODE(doc, STR("")); -} - -// parse with transfer_ownership_tag attribute - it's now load_buffer_inplace_own -TEST(document_parse_transfer_ownership) -{ - allocation_function alloc = get_memory_allocation_function(); - - char* text = static_cast(alloc(strlen("") + 1)); - CHECK(text); - - memcpy(text, "", strlen("") + 1); - - pugi::xml_document doc; - - CHECK(doc.parse(transfer_ownership_tag(), text)); - CHECK_NODE(doc, STR("")); -} - -#ifndef PUGIXML_NO_STL -// as_utf16 - it's now as_wide -TEST(as_utf16) -{ - CHECK(as_utf16("") == L""); - - // valid 1-byte, 2-byte and 3-byte inputs -#ifdef U_LITERALS - CHECK(as_utf16("?\xd0\x80\xe2\x80\xbd") == L"?\u0400\u203D"); -#else - CHECK(as_utf16("?\xd0\x80\xe2\x80\xbd") == L"?\x0400\x203D"); -#endif -} -#endif - -// wildcard functions -TEST_XML(dom_node_child_w, "") -{ - CHECK(doc.child_w(STR("n?de")) == doc.child(STR("node"))); - CHECK(doc.child_w(STR("n[az]de")) == xml_node()); - CHECK(doc.child_w(STR("n[aoz]de")) == doc.child(STR("node"))); - CHECK(doc.child_w(STR("*e")) == doc.child(STR("node"))); - CHECK(doc.child(STR("node")).child_w(STR("*l?[23456789]*")) == doc.child(STR("node")).child(STR("child2"))); -} - -TEST_XML(dom_node_attribute_w, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(node.attribute_w(STR("*tt?[23456789]*")) == node.attribute(STR("attr2"))); - CHECK(node.attribute_w(STR("?")) == xml_attribute()); -} - -TEST_XML(dom_node_next_previous_sibling_w, "") -{ - CHECK(xml_node().next_sibling_w(STR("n")) == xml_node()); - CHECK(xml_node().previous_sibling_w(STR("n")) == xml_node()); - - xml_node child1 = doc.child(STR("node")).child(STR("child1")); - xml_node child3 = doc.child(STR("node")).child(STR("child3")); - - CHECK(child1.next_sibling_w(STR("*[3456789]")) == child3); - CHECK(child1.next_sibling_w(STR("?")) == xml_node()); - CHECK(child3.previous_sibling_w(STR("*[3456789]")) == xml_node()); - CHECK(child3.previous_sibling_w(STR("?")) == xml_node()); - CHECK(child3.previous_sibling_w(STR("*1")) == child1); -} - -TEST_XML(dom_node_child_value_w, "value1value2value4") -{ - CHECK_STRING(xml_node().child_value_w(STR("n")), STR("")); - - xml_node node = doc.child(STR("node")); - - CHECK_STRING(node.child_value_w(STR("c*[23456789]")), STR("value2")); - CHECK_STRING(node.child_value_w(STR("*")), STR("")); // child_value(name) and child_value_w(pattern) do not continue the search if a node w/out value is found first - CHECK_STRING(node.child_value_w(STR("nothing*here")), STR("")); -} - -TEST_XML(dom_node_find_child_by_attribute_w, "") -{ - CHECK(xml_node().find_child_by_attribute_w(STR("name"), STR("attr"), STR("value")) == xml_node()); - CHECK(xml_node().find_child_by_attribute_w(STR("attr"), STR("value")) == xml_node()); - - xml_node node = doc.child(STR("node")); - - CHECK(node.find_child_by_attribute_w(STR("*"), STR("att?"), STR("val*[0123456789]")) == node.child(STR("child1"))); - CHECK(node.find_child_by_attribute_w(STR("*"), STR("attr3"), STR("val*[0123456789]")) == xml_node()); - CHECK(node.find_child_by_attribute_w(STR("att?"), STR("val*[0123456789]")) == node.child(STR("child1"))); - CHECK(node.find_child_by_attribute_w(STR("attr3"), STR("val*[0123456789]")) == xml_node()); -} - -TEST_XML(dom_node_all_elements_by_name, "") -{ - std::vector v; - - v.clear(); - xml_node().all_elements_by_name(STR("node"), std::back_inserter(v)); - CHECK(v.empty()); - - v.clear(); - doc.all_elements_by_name(STR("node"), std::back_inserter(v)); - CHECK(v.size() == 1 && v[0] == doc.child(STR("node"))); - - v.clear(); - doc.all_elements_by_name(STR("child"), std::back_inserter(v)); - CHECK(v.size() == 3); - CHECK(v[0] == doc.child(STR("node")).child(STR("child"))); - CHECK(v[1] == doc.child(STR("node")).child(STR("child")).first_child()); - CHECK(v[2] == doc.child(STR("node")).child(STR("child")).last_child()); -} - -TEST_XML(dom_node_all_elements_by_name_w, "") -{ - std::vector v; - - v.clear(); - xml_node().all_elements_by_name_w(STR("*"), std::back_inserter(v)); - CHECK(v.empty()); - - v.clear(); - doc.all_elements_by_name_w(STR("*"), std::back_inserter(v)); - CHECK(v.size() == 4); - CHECK(v[0] == doc.child(STR("node"))); - CHECK(v[1] == doc.child(STR("node")).child(STR("child"))); - CHECK(v[2] == doc.child(STR("node")).child(STR("child")).first_child()); - CHECK(v[3] == doc.child(STR("node")).child(STR("child")).last_child()); -} - -TEST_XML(dom_node_wildcard_cset, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(node.attribute_w(STR("[A-Z]")).as_int() == 0); - CHECK(node.attribute_w(STR("[a-z]")).as_int() == 1); - CHECK(node.attribute_w(STR("[A-z]")).as_int() == 1); - CHECK(node.attribute_w(STR("[z-a]")).as_int() == 0); - CHECK(node.attribute_w(STR("[a-zA-Z]")).as_int() == 1); - CHECK(node.attribute_w(STR("[!A-Z]")).as_int() == 1); - CHECK(node.attribute_w(STR("[!A-Za-z]")).as_int() == 0); -} - -TEST_XML(dom_node_wildcard_star, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(node.attribute_w(STR("*")).as_int() == 1); - CHECK(node.attribute_w(STR("?d*")).as_int() == 1); - CHECK(node.attribute_w(STR("?c*")).as_int() == 0); - CHECK(node.attribute_w(STR("*?*c*")).as_int() == 0); - CHECK(node.attribute_w(STR("*?*d*")).as_int() == 1); -} - -// document order -TEST_XML(document_order_coverage, "") -{ - doc.precompute_document_order(); - - CHECK(doc.child(STR("node")).document_order() == 0); - CHECK(doc.child(STR("node")).attribute(STR("id")).document_order() == 0); -} +// This file includes all tests for deprecated functionality; this is going away in the next release! + +#ifdef _MSC_VER +# pragma warning(disable: 4996) +#endif + +#ifdef __GNUC__ +# if __GNUC__ >= 4 && __GNUC_MINOR__ >= 2 +# pragma GCC diagnostic ignored "-Wdeprecated-declarations" +# else +# define PUGIXML_DEPRECATED +# endif +#endif + +#ifdef __INTEL_COMPILER +# pragma warning(disable: 1478) +#endif + +#include + +#include "common.hpp" + +#include "writer_string.hpp" + +#include +#include + +// format_write_bom_utf8 - it's now format_write_bom! +TEST_XML(document_save_bom_utf8, "") +{ + xml_writer_string writer; + + CHECK(test_save_narrow(doc, pugi::format_no_declaration | pugi::format_raw | pugi::format_write_bom_utf8, encoding_utf8, "\xef\xbb\xbf", 11)); +} + +// parse - it's now load_buffer_inplace +TEST(document_parse) +{ + char text[] = ""; + + pugi::xml_document doc; + + CHECK(doc.parse(text)); + CHECK_NODE(doc, STR("")); +} + +// parse with transfer_ownership_tag attribute - it's now load_buffer_inplace_own +TEST(document_parse_transfer_ownership) +{ + allocation_function alloc = get_memory_allocation_function(); + + char* text = static_cast(alloc(strlen("") + 1)); + CHECK(text); + + memcpy(text, "", strlen("") + 1); + + pugi::xml_document doc; + + CHECK(doc.parse(transfer_ownership_tag(), text)); + CHECK_NODE(doc, STR("")); +} + +#ifndef PUGIXML_NO_STL +// as_utf16 - it's now as_wide +TEST(as_utf16) +{ + CHECK(as_utf16("") == L""); + + // valid 1-byte, 2-byte and 3-byte inputs +#ifdef U_LITERALS + CHECK(as_utf16("?\xd0\x80\xe2\x80\xbd") == L"?\u0400\u203D"); +#else + CHECK(as_utf16("?\xd0\x80\xe2\x80\xbd") == L"?\x0400\x203D"); +#endif +} +#endif + +// wildcard functions +TEST_XML(dom_node_child_w, "") +{ + CHECK(doc.child_w(STR("n?de")) == doc.child(STR("node"))); + CHECK(doc.child_w(STR("n[az]de")) == xml_node()); + CHECK(doc.child_w(STR("n[aoz]de")) == doc.child(STR("node"))); + CHECK(doc.child_w(STR("*e")) == doc.child(STR("node"))); + CHECK(doc.child(STR("node")).child_w(STR("*l?[23456789]*")) == doc.child(STR("node")).child(STR("child2"))); +} + +TEST_XML(dom_node_attribute_w, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(node.attribute_w(STR("*tt?[23456789]*")) == node.attribute(STR("attr2"))); + CHECK(node.attribute_w(STR("?")) == xml_attribute()); +} + +TEST_XML(dom_node_next_previous_sibling_w, "") +{ + CHECK(xml_node().next_sibling_w(STR("n")) == xml_node()); + CHECK(xml_node().previous_sibling_w(STR("n")) == xml_node()); + + xml_node child1 = doc.child(STR("node")).child(STR("child1")); + xml_node child3 = doc.child(STR("node")).child(STR("child3")); + + CHECK(child1.next_sibling_w(STR("*[3456789]")) == child3); + CHECK(child1.next_sibling_w(STR("?")) == xml_node()); + CHECK(child3.previous_sibling_w(STR("*[3456789]")) == xml_node()); + CHECK(child3.previous_sibling_w(STR("?")) == xml_node()); + CHECK(child3.previous_sibling_w(STR("*1")) == child1); +} + +TEST_XML(dom_node_child_value_w, "value1value2value4") +{ + CHECK_STRING(xml_node().child_value_w(STR("n")), STR("")); + + xml_node node = doc.child(STR("node")); + + CHECK_STRING(node.child_value_w(STR("c*[23456789]")), STR("value2")); + CHECK_STRING(node.child_value_w(STR("*")), STR("")); // child_value(name) and child_value_w(pattern) do not continue the search if a node w/out value is found first + CHECK_STRING(node.child_value_w(STR("nothing*here")), STR("")); +} + +TEST_XML(dom_node_find_child_by_attribute_w, "") +{ + CHECK(xml_node().find_child_by_attribute_w(STR("name"), STR("attr"), STR("value")) == xml_node()); + CHECK(xml_node().find_child_by_attribute_w(STR("attr"), STR("value")) == xml_node()); + + xml_node node = doc.child(STR("node")); + + CHECK(node.find_child_by_attribute_w(STR("*"), STR("att?"), STR("val*[0123456789]")) == node.child(STR("child1"))); + CHECK(node.find_child_by_attribute_w(STR("*"), STR("attr3"), STR("val*[0123456789]")) == xml_node()); + CHECK(node.find_child_by_attribute_w(STR("att?"), STR("val*[0123456789]")) == node.child(STR("child1"))); + CHECK(node.find_child_by_attribute_w(STR("attr3"), STR("val*[0123456789]")) == xml_node()); +} + +TEST_XML(dom_node_all_elements_by_name, "") +{ + std::vector v; + + v.clear(); + xml_node().all_elements_by_name(STR("node"), std::back_inserter(v)); + CHECK(v.empty()); + + v.clear(); + doc.all_elements_by_name(STR("node"), std::back_inserter(v)); + CHECK(v.size() == 1 && v[0] == doc.child(STR("node"))); + + v.clear(); + doc.all_elements_by_name(STR("child"), std::back_inserter(v)); + CHECK(v.size() == 3); + CHECK(v[0] == doc.child(STR("node")).child(STR("child"))); + CHECK(v[1] == doc.child(STR("node")).child(STR("child")).first_child()); + CHECK(v[2] == doc.child(STR("node")).child(STR("child")).last_child()); +} + +TEST_XML(dom_node_all_elements_by_name_w, "") +{ + std::vector v; + + v.clear(); + xml_node().all_elements_by_name_w(STR("*"), std::back_inserter(v)); + CHECK(v.empty()); + + v.clear(); + doc.all_elements_by_name_w(STR("*"), std::back_inserter(v)); + CHECK(v.size() == 4); + CHECK(v[0] == doc.child(STR("node"))); + CHECK(v[1] == doc.child(STR("node")).child(STR("child"))); + CHECK(v[2] == doc.child(STR("node")).child(STR("child")).first_child()); + CHECK(v[3] == doc.child(STR("node")).child(STR("child")).last_child()); +} + +TEST_XML(dom_node_wildcard_cset, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(node.attribute_w(STR("[A-Z]")).as_int() == 0); + CHECK(node.attribute_w(STR("[a-z]")).as_int() == 1); + CHECK(node.attribute_w(STR("[A-z]")).as_int() == 1); + CHECK(node.attribute_w(STR("[z-a]")).as_int() == 0); + CHECK(node.attribute_w(STR("[a-zA-Z]")).as_int() == 1); + CHECK(node.attribute_w(STR("[!A-Z]")).as_int() == 1); + CHECK(node.attribute_w(STR("[!A-Za-z]")).as_int() == 0); +} + +TEST_XML(dom_node_wildcard_star, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(node.attribute_w(STR("*")).as_int() == 1); + CHECK(node.attribute_w(STR("?d*")).as_int() == 1); + CHECK(node.attribute_w(STR("?c*")).as_int() == 0); + CHECK(node.attribute_w(STR("*?*c*")).as_int() == 0); + CHECK(node.attribute_w(STR("*?*d*")).as_int() == 1); +} + +// document order +TEST_XML(document_order_coverage, "") +{ + doc.precompute_document_order(); + + CHECK(doc.child(STR("node")).document_order() == 0); + CHECK(doc.child(STR("node")).attribute(STR("id")).document_order() == 0); +} diff --git a/tests/test_document.cpp b/tests/test_document.cpp index 9a83a6d..1f781e2 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -1,710 +1,710 @@ -#define _CRT_SECURE_NO_WARNINGS -#define _CRT_NONSTDC_NO_DEPRECATE 0 - -#include // because Borland's STL is braindead, we have to include _before_ in order to get memcpy - -#include "common.hpp" - -#include "writer_string.hpp" - -#include -#include - -#include -#include - -#include - -#ifdef __MINGW32__ -# include // for unlink in C++0x mode -#endif - -#if defined(__CELLOS_LV2__) -# include // for unlink -#endif - -TEST(document_create_empty) -{ - pugi::xml_document doc; - CHECK_NODE(doc, STR("")); -} - -TEST(document_create) -{ - pugi::xml_document doc; - doc.append_child().set_name(STR("node")); - CHECK_NODE(doc, STR("")); -} - -#ifndef PUGIXML_NO_STL -TEST(document_load_stream) -{ - pugi::xml_document doc; - - std::istringstream iss(""); - CHECK(doc.load(iss)); - CHECK_NODE(doc, STR("")); -} - -TEST(document_load_stream_offset) -{ - pugi::xml_document doc; - - std::istringstream iss(" "); - - std::string s; - iss >> s; - - CHECK(doc.load(iss)); - CHECK_NODE(doc, STR("")); -} - -TEST(document_load_stream_text) -{ - pugi::xml_document doc; - - std::ifstream iss("tests/data/multiline.xml"); - CHECK(doc.load(iss)); - CHECK_NODE(doc, STR("")); -} - -TEST(document_load_stream_error) -{ - pugi::xml_document doc; - - std::ifstream fs1("filedoesnotexist"); - CHECK(doc.load(fs1).status == status_io_error); - -#ifndef __DMC__ // Digital Mars CRT does not like 'con' pseudo-file - std::ifstream fs2("con"); - CHECK(doc.load(fs2).status == status_io_error); -#endif - - test_runner::_memory_fail_threshold = 1; - std::istringstream iss(""); - CHECK(doc.load(iss).status == status_out_of_memory); -} - -TEST(document_load_stream_empty) -{ - std::istringstream iss; - - pugi::xml_document doc; - doc.load(iss); // parse result depends on STL implementation - CHECK(!doc.first_child()); -} - -TEST(document_load_stream_wide) -{ - pugi::xml_document doc; - - std::basic_istringstream iss(L""); - CHECK(doc.load(iss)); - CHECK_NODE(doc, STR("")); -} -#endif - -TEST(document_load_string) -{ - pugi::xml_document doc; - - CHECK(doc.load(STR(""))); - CHECK_NODE(doc, STR("")); -} - -TEST(document_load_file) -{ - pugi::xml_document doc; - - CHECK(doc.load_file("tests/data/small.xml")); - CHECK_NODE(doc, STR("")); -} - -TEST(document_load_file_empty) -{ - pugi::xml_document doc; - - CHECK(doc.load_file("tests/data/empty.xml")); - CHECK(!doc.first_child()); -} - -TEST(document_load_file_large) -{ - pugi::xml_document doc; - - CHECK(doc.load_file("tests/data/large.xml")); - - std::basic_string str; - str += STR(""); - for (int i = 0; i < 10000; ++i) str += STR(""); - str += STR(""); - - CHECK_NODE(doc, str.c_str()); -} - -TEST(document_load_file_error) -{ - pugi::xml_document doc; - - CHECK(doc.load_file("filedoesnotexist").status == status_file_not_found); - -#ifdef _WIN32 -#ifndef __DMC__ // Digital Mars CRT does not like 'con' pseudo-file - CHECK(doc.load_file("con").status == status_io_error); -#endif -#endif - - test_runner::_memory_fail_threshold = 1; - CHECK(doc.load_file("tests/data/small.xml").status == status_out_of_memory); -} - -TEST_XML(document_save, "") -{ - xml_writer_string writer; - - doc.save(writer, STR(""), pugi::format_no_declaration | pugi::format_raw, get_native_encoding()); - - CHECK(writer.as_string() == STR("")); -} - -#ifndef PUGIXML_NO_STL -TEST_XML(document_save_stream, "") -{ - std::ostringstream oss; - - doc.save(oss, STR(""), pugi::format_no_declaration | pugi::format_raw); - - CHECK(oss.str() == ""); -} - -TEST_XML(document_save_stream_wide, "") -{ - std::basic_ostringstream oss; - - doc.save(oss, STR(""), pugi::format_no_declaration | pugi::format_raw); - - CHECK(oss.str() == L""); -} -#endif - -TEST_XML(document_save_bom, "") -{ - unsigned int flags = format_no_declaration | format_raw | format_write_bom; - - // specific encodings - CHECK(test_save_narrow(doc, flags, encoding_utf8, "\xef\xbb\xbf", 8)); - CHECK(test_save_narrow(doc, flags, encoding_utf16_be, "\xfe\xff\x00<\x00n\x00 \x00/\x00>", 12)); - CHECK(test_save_narrow(doc, flags, encoding_utf16_le, "\xff\xfe<\x00n\x00 \x00/\x00>\x00", 12)); - CHECK(test_save_narrow(doc, flags, encoding_utf32_be, "\x00\x00\xfe\xff\x00\x00\x00<\x00\x00\x00n\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>", 24)); - CHECK(test_save_narrow(doc, flags, encoding_utf32_le, "\xff\xfe\x00\x00<\x00\x00\x00n\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>\x00\x00\x00", 24)); - - // encodings synonyms - CHECK(save_narrow(doc, flags, encoding_utf16) == save_narrow(doc, flags, (is_little_endian() ? encoding_utf16_le : encoding_utf16_be))); - CHECK(save_narrow(doc, flags, encoding_utf32) == save_narrow(doc, flags, (is_little_endian() ? encoding_utf32_le : encoding_utf32_be))); - - size_t wcharsize = sizeof(wchar_t); - CHECK(save_narrow(doc, flags, encoding_wchar) == save_narrow(doc, flags, (wcharsize == 2 ? encoding_utf16 : encoding_utf32))); -} - -TEST_XML(document_save_declaration, "") -{ - xml_writer_string writer; - - doc.save(writer, STR(""), pugi::format_default, get_native_encoding()); - - CHECK(writer.as_string() == STR("\n\n")); -} - -TEST_XML(document_save_declaration_present_first, "") -{ - doc.insert_child_before(node_declaration, doc.first_child()).append_attribute(STR("encoding")) = STR("utf8"); - - xml_writer_string writer; - - doc.save(writer, STR(""), pugi::format_default, get_native_encoding()); - - CHECK(writer.as_string() == STR("\n\n")); -} - -TEST_XML(document_save_declaration_present_second, "") -{ - doc.insert_child_before(node_declaration, doc.first_child()).append_attribute(STR("encoding")) = STR("utf8"); - doc.insert_child_before(node_comment, doc.first_child()).set_value(STR("text")); - - xml_writer_string writer; - - doc.save(writer, STR(""), pugi::format_default, get_native_encoding()); - - CHECK(writer.as_string() == STR("\n\n\n")); -} - -TEST_XML(document_save_declaration_present_last, "") -{ - doc.append_child(node_declaration).append_attribute(STR("encoding")) = STR("utf8"); - - xml_writer_string writer; - - doc.save(writer, STR(""), pugi::format_default, get_native_encoding()); - - // node writer only looks for declaration before the first element child - CHECK(writer.as_string() == STR("\n\n\n")); -} - -TEST_XML(document_save_file, "") -{ -#ifdef __unix - char path[] = "/tmp/pugiXXXXXX"; - - int fd = mkstemp(path); - CHECK(fd != -1); -#elif defined(__CELLOS_LV2__) - const char* path = ""; // no temporary file support -#else - const char* path = tmpnam(0); -#endif - - CHECK(doc.save_file(path)); - - CHECK(doc.load_file(path, pugi::parse_default | pugi::parse_declaration)); - CHECK_NODE(doc, STR("")); - - CHECK(unlink(path) == 0); - -#ifdef __unix - CHECK(close(fd) == 0); -#endif -} - -TEST_XML(document_save_file_error, "") -{ - CHECK(!doc.save_file("tests/data/unknown/output.xml")); -} - -TEST(document_load_buffer) -{ - const pugi::char_t text[] = STR(""); - - pugi::xml_document doc; - - CHECK(doc.load_buffer(text, sizeof(text))); - CHECK_NODE(doc, STR("")); -} - -TEST(document_load_buffer_inplace) -{ - pugi::char_t text[] = STR(""); - - pugi::xml_document doc; - - CHECK(doc.load_buffer_inplace(text, sizeof(text))); - CHECK_NODE(doc, STR("")); -} - -TEST(document_load_buffer_inplace_own) -{ - allocation_function alloc = get_memory_allocation_function(); - - size_t size = strlen("") * sizeof(pugi::char_t); - - pugi::char_t* text = static_cast(alloc(size)); - CHECK(text); - - memcpy(text, STR(""), size); - - pugi::xml_document doc; - - CHECK(doc.load_buffer_inplace_own(text, size)); - CHECK_NODE(doc, STR("")); -} - -TEST(document_parse_result_bool) -{ - xml_parse_result result; - - result.status = status_ok; - CHECK(result); - CHECK(!!result); - CHECK(result == true); - - for (int i = 1; i < 20; ++i) - { - result.status = (xml_parse_status)i; - CHECK(!result); - CHECK(result == false); - } -} - -TEST(document_parse_result_description) -{ - xml_parse_result result; - - for (int i = 0; i < 20; ++i) - { - result.status = (xml_parse_status)i; - - CHECK(result.description() != 0); - CHECK(result.description()[0] != 0); - } -} - -TEST(document_load_fail) -{ - xml_document doc; - CHECK(!doc.load(STR(""))); - CHECK(doc.child(STR("foo")).child(STR("bar"))); -} - -inline void check_utftest_document(const xml_document& doc) -{ - // ascii text - CHECK_STRING(doc.last_child().first_child().name(), STR("English")); - - // check that we have parsed some non-ascii text - CHECK((unsigned)doc.last_child().last_child().name()[0] >= 0x80); - - // check magic string - const pugi::char_t* v = doc.last_child().child(STR("Heavy")).previous_sibling().child_value(); - -#ifdef PUGIXML_WCHAR_MODE - CHECK(v[0] == 0x4e16 && v[1] == 0x754c && v[2] == 0x6709 && v[3] == 0x5f88 && v[4] == 0x591a && v[5] == 0x8bed && v[6] == 0x8a00); - - // last character is a surrogate pair - unsigned int v7 = v[7]; - size_t wcharsize = sizeof(wchar_t); - - CHECK(wcharsize == 2 ? (v[7] == 0xd852 && v[8] == 0xdf62) : (v7 == 0x24b62)); -#else - // unicode string - CHECK_STRING(v, "\xe4\xb8\x96\xe7\x95\x8c\xe6\x9c\x89\xe5\xbe\x88\xe5\xa4\x9a\xe8\xaf\xad\xe8\xa8\x80\xf0\xa4\xad\xa2"); -#endif -} - -TEST(document_load_file_convert_auto) -{ - const char* files[] = - { - "tests/data/utftest_utf16_be.xml", - "tests/data/utftest_utf16_be_bom.xml", - "tests/data/utftest_utf16_be_nodecl.xml", - "tests/data/utftest_utf16_le.xml", - "tests/data/utftest_utf16_le_bom.xml", - "tests/data/utftest_utf16_le_nodecl.xml", - "tests/data/utftest_utf32_be.xml", - "tests/data/utftest_utf32_be_bom.xml", - "tests/data/utftest_utf32_be_nodecl.xml", - "tests/data/utftest_utf32_le.xml", - "tests/data/utftest_utf32_le_bom.xml", - "tests/data/utftest_utf32_le_nodecl.xml", - "tests/data/utftest_utf8.xml", - "tests/data/utftest_utf8_bom.xml", - "tests/data/utftest_utf8_nodecl.xml" - }; - - xml_encoding encodings[] = - { - encoding_utf16_be, encoding_utf16_be, encoding_utf16_be, - encoding_utf16_le, encoding_utf16_le, encoding_utf16_le, - encoding_utf32_be, encoding_utf32_be, encoding_utf32_be, - encoding_utf32_le, encoding_utf32_le, encoding_utf32_le, - encoding_utf8, encoding_utf8, encoding_utf8 - }; - - for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i) - { - xml_document doc; - xml_parse_result res = doc.load_file(files[i]); - - CHECK(res); - CHECK(res.encoding == encodings[i]); - check_utftest_document(doc); - } -} - -TEST(document_load_file_convert_specific) -{ - const char* files[] = - { - "tests/data/utftest_utf16_be.xml", - "tests/data/utftest_utf16_be_bom.xml", - "tests/data/utftest_utf16_be_nodecl.xml", - "tests/data/utftest_utf16_le.xml", - "tests/data/utftest_utf16_le_bom.xml", - "tests/data/utftest_utf16_le_nodecl.xml", - "tests/data/utftest_utf32_be.xml", - "tests/data/utftest_utf32_be_bom.xml", - "tests/data/utftest_utf32_be_nodecl.xml", - "tests/data/utftest_utf32_le.xml", - "tests/data/utftest_utf32_le_bom.xml", - "tests/data/utftest_utf32_le_nodecl.xml", - "tests/data/utftest_utf8.xml", - "tests/data/utftest_utf8_bom.xml", - "tests/data/utftest_utf8_nodecl.xml" - }; - - xml_encoding encodings[] = - { - encoding_utf16_be, encoding_utf16_be, encoding_utf16_be, - encoding_utf16_le, encoding_utf16_le, encoding_utf16_le, - encoding_utf32_be, encoding_utf32_be, encoding_utf32_be, - encoding_utf32_le, encoding_utf32_le, encoding_utf32_le, - encoding_utf8, encoding_utf8, encoding_utf8 - }; - - for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i) - { - for (unsigned int j = 0; j < sizeof(files) / sizeof(files[0]); ++j) - { - xml_encoding encoding = encodings[j]; - - xml_document doc; - xml_parse_result res = doc.load_file(files[i], parse_default, encoding); - - if (encoding == encodings[i]) - { - CHECK(res); - CHECK(res.encoding == encoding); - check_utftest_document(doc); - } - else - { - // should not get past first tag - CHECK(!doc.first_child()); - } - } - } -} - -TEST(document_load_file_convert_native_endianness) -{ - const char* files[2][6] = - { - { - "tests/data/utftest_utf16_be.xml", - "tests/data/utftest_utf16_be_bom.xml", - "tests/data/utftest_utf16_be_nodecl.xml", - "tests/data/utftest_utf32_be.xml", - "tests/data/utftest_utf32_be_bom.xml", - "tests/data/utftest_utf32_be_nodecl.xml", - }, - { - "tests/data/utftest_utf16_le.xml", - "tests/data/utftest_utf16_le_bom.xml", - "tests/data/utftest_utf16_le_nodecl.xml", - "tests/data/utftest_utf32_le.xml", - "tests/data/utftest_utf32_le_bom.xml", - "tests/data/utftest_utf32_le_nodecl.xml", - } - }; - - xml_encoding encodings[] = - { - encoding_utf16, encoding_utf16, encoding_utf16, - encoding_utf32, encoding_utf32, encoding_utf32 - }; - - for (unsigned int i = 0; i < sizeof(files[0]) / sizeof(files[0][0]); ++i) - { - const char* right_file = files[is_little_endian()][i]; - const char* wrong_file = files[!is_little_endian()][i]; - - for (unsigned int j = 0; j < sizeof(encodings) / sizeof(encodings[0]); ++j) - { - xml_encoding encoding = encodings[j]; - - // check file with right endianness - { - xml_document doc; - xml_parse_result res = doc.load_file(right_file, parse_default, encoding); - - if (encoding == encodings[i]) - { - CHECK(res); - check_utftest_document(doc); - } - else - { - // should not get past first tag - CHECK(!doc.first_child()); - } - } - - // check file with wrong endianness - { - xml_document doc; - doc.load_file(wrong_file, parse_default, encoding); - CHECK(!doc.first_child()); - } - } - } -} - -static bool load_file_in_memory(const char* path, char*& data, size_t& size) -{ - FILE* file = fopen(path, "rb"); - if (!file) return false; - - fseek(file, 0, SEEK_END); - size = (size_t)ftell(file); - fseek(file, 0, SEEK_SET); - - data = new char[size]; - - CHECK(fread(data, 1, size, file) == size); - fclose(file); - - return true; -} - -TEST(document_contents_preserve) -{ - struct file_t - { - const char* path; - xml_encoding encoding; - - char* data; - size_t size; - }; - - file_t files[] = - { - {"tests/data/utftest_utf16_be_clean.xml", encoding_utf16_be, 0, 0}, - {"tests/data/utftest_utf16_le_clean.xml", encoding_utf16_le, 0, 0}, - {"tests/data/utftest_utf32_be_clean.xml", encoding_utf32_be, 0, 0}, - {"tests/data/utftest_utf32_le_clean.xml", encoding_utf32_le, 0, 0}, - {"tests/data/utftest_utf8_clean.xml", encoding_utf8, 0, 0} - }; - - // load files in memory - for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i) - { - CHECK(load_file_in_memory(files[i].path, files[i].data, files[i].size)); - } - - // convert each file to each format and compare bitwise - for (unsigned int src = 0; src < sizeof(files) / sizeof(files[0]); ++src) - { - for (unsigned int dst = 0; dst < sizeof(files) / sizeof(files[0]); ++dst) - { - // parse into document (preserve comments, declaration and whitespace pcdata) - xml_document doc; - CHECK(doc.load_buffer(files[src].data, files[src].size, parse_default | parse_ws_pcdata | parse_declaration | parse_comments)); - - // compare saved document with the original (raw formatting, without extra declaration, write bom if it was in original file) - CHECK(test_save_narrow(doc, format_raw | format_no_declaration | format_write_bom, files[dst].encoding, files[dst].data, files[dst].size)); - } - } - - // cleanup - for (unsigned int j = 0; j < sizeof(files) / sizeof(files[0]); ++j) - { - delete[] files[j].data; - } -} - -static bool test_parse_fail(const void* buffer, size_t size, xml_encoding encoding = encoding_utf8) -{ - // copy buffer to heap (to enable out-of-bounds checks) - void* temp = malloc(size); - memcpy(temp, buffer, size); - - // check that this parses without buffer overflows (yielding an error) - xml_document doc; - bool result = doc.load_buffer_inplace(temp, size, parse_default, encoding); - - free(temp); - - return !result; -} - -TEST(document_convert_invalid_utf8) -{ - // invalid 1-byte input - CHECK(test_parse_fail("<\xb0", 2)); - - // invalid 2-byte input - CHECK(test_parse_fail("<\xc0", 2)); - CHECK(test_parse_fail("<\xd0", 2)); - - // invalid 3-byte input - CHECK(test_parse_fail("<\xe2\x80", 3)); - CHECK(test_parse_fail("<\xe2", 2)); - - // invalid 4-byte input - CHECK(test_parse_fail("<\xf2\x97\x98", 4)); - CHECK(test_parse_fail("<\xf2\x97", 3)); - CHECK(test_parse_fail("<\xf2", 2)); - - // invalid 5-byte input - CHECK(test_parse_fail("<\xf8", 2)); -} - -TEST(document_convert_invalid_utf16) -{ - // check non-terminated degenerate handling - CHECK(test_parse_fail("\x00<\xda\x1d", 4, encoding_utf16_be)); - CHECK(test_parse_fail("<\x00\x1d\xda", 4, encoding_utf16_le)); - - // check incorrect leading code - CHECK(test_parse_fail("\x00<\xde\x24", 4, encoding_utf16_be)); - CHECK(test_parse_fail("<\x00\x24\xde", 4, encoding_utf16_le)); -} - -TEST(document_load_buffer_empty) -{ - xml_encoding encodings[] = - { - encoding_auto, - encoding_utf8, - encoding_utf16_le, - encoding_utf16_be, - encoding_utf16, - encoding_utf32_le, - encoding_utf32_be, - encoding_utf32, - encoding_wchar - }; - - char buffer[1]; - - for (unsigned int i = 0; i < sizeof(encodings) / sizeof(encodings[0]); ++i) - { - xml_encoding encoding = encodings[i]; - - xml_document doc; - CHECK(doc.load_buffer(buffer, 0, parse_default, encoding) && !doc.first_child()); - CHECK(doc.load_buffer(0, 0, parse_default, encoding) && !doc.first_child()); - - CHECK(doc.load_buffer_inplace(buffer, 0, parse_default, encoding) && !doc.first_child()); - CHECK(doc.load_buffer_inplace(0, 0, parse_default, encoding) && !doc.first_child()); - - void* own_buffer = pugi::get_memory_allocation_function()(1); - - CHECK(doc.load_buffer_inplace_own(own_buffer, 0, parse_default, encoding) && !doc.first_child()); - CHECK(doc.load_buffer_inplace_own(0, 0, parse_default, encoding) && !doc.first_child()); - } -} - -TEST(document_progressive_truncation) -{ - char* original_data; - size_t original_size; - - CHECK(load_file_in_memory("tests/data/utftest_utf8.xml", original_data, original_size)); - - for (size_t i = 1; i < original_size; ++i) - { - char* truncated_data = new char[i]; - memcpy(truncated_data, original_data, i); - - xml_document doc; - bool result = doc.load_buffer(truncated_data, i); - - // some truncate locations are parseable - those that come after declaration, declaration + doctype, declaration + doctype + comment and eof - CHECK(((i - 21) < 3 || (i - 66) < 3 || (i - 95) < 3 || i >= 3325) ? result : !result); - - delete[] truncated_data; - } - - delete[] original_data; -} +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_NONSTDC_NO_DEPRECATE 0 + +#include // because Borland's STL is braindead, we have to include _before_ in order to get memcpy + +#include "common.hpp" + +#include "writer_string.hpp" + +#include +#include + +#include +#include + +#include + +#ifdef __MINGW32__ +# include // for unlink in C++0x mode +#endif + +#if defined(__CELLOS_LV2__) +# include // for unlink +#endif + +TEST(document_create_empty) +{ + pugi::xml_document doc; + CHECK_NODE(doc, STR("")); +} + +TEST(document_create) +{ + pugi::xml_document doc; + doc.append_child().set_name(STR("node")); + CHECK_NODE(doc, STR("")); +} + +#ifndef PUGIXML_NO_STL +TEST(document_load_stream) +{ + pugi::xml_document doc; + + std::istringstream iss(""); + CHECK(doc.load(iss)); + CHECK_NODE(doc, STR("")); +} + +TEST(document_load_stream_offset) +{ + pugi::xml_document doc; + + std::istringstream iss(" "); + + std::string s; + iss >> s; + + CHECK(doc.load(iss)); + CHECK_NODE(doc, STR("")); +} + +TEST(document_load_stream_text) +{ + pugi::xml_document doc; + + std::ifstream iss("tests/data/multiline.xml"); + CHECK(doc.load(iss)); + CHECK_NODE(doc, STR("")); +} + +TEST(document_load_stream_error) +{ + pugi::xml_document doc; + + std::ifstream fs1("filedoesnotexist"); + CHECK(doc.load(fs1).status == status_io_error); + +#ifndef __DMC__ // Digital Mars CRT does not like 'con' pseudo-file + std::ifstream fs2("con"); + CHECK(doc.load(fs2).status == status_io_error); +#endif + + test_runner::_memory_fail_threshold = 1; + std::istringstream iss(""); + CHECK(doc.load(iss).status == status_out_of_memory); +} + +TEST(document_load_stream_empty) +{ + std::istringstream iss; + + pugi::xml_document doc; + doc.load(iss); // parse result depends on STL implementation + CHECK(!doc.first_child()); +} + +TEST(document_load_stream_wide) +{ + pugi::xml_document doc; + + std::basic_istringstream iss(L""); + CHECK(doc.load(iss)); + CHECK_NODE(doc, STR("")); +} +#endif + +TEST(document_load_string) +{ + pugi::xml_document doc; + + CHECK(doc.load(STR(""))); + CHECK_NODE(doc, STR("")); +} + +TEST(document_load_file) +{ + pugi::xml_document doc; + + CHECK(doc.load_file("tests/data/small.xml")); + CHECK_NODE(doc, STR("")); +} + +TEST(document_load_file_empty) +{ + pugi::xml_document doc; + + CHECK(doc.load_file("tests/data/empty.xml")); + CHECK(!doc.first_child()); +} + +TEST(document_load_file_large) +{ + pugi::xml_document doc; + + CHECK(doc.load_file("tests/data/large.xml")); + + std::basic_string str; + str += STR(""); + for (int i = 0; i < 10000; ++i) str += STR(""); + str += STR(""); + + CHECK_NODE(doc, str.c_str()); +} + +TEST(document_load_file_error) +{ + pugi::xml_document doc; + + CHECK(doc.load_file("filedoesnotexist").status == status_file_not_found); + +#ifdef _WIN32 +#ifndef __DMC__ // Digital Mars CRT does not like 'con' pseudo-file + CHECK(doc.load_file("con").status == status_io_error); +#endif +#endif + + test_runner::_memory_fail_threshold = 1; + CHECK(doc.load_file("tests/data/small.xml").status == status_out_of_memory); +} + +TEST_XML(document_save, "") +{ + xml_writer_string writer; + + doc.save(writer, STR(""), pugi::format_no_declaration | pugi::format_raw, get_native_encoding()); + + CHECK(writer.as_string() == STR("")); +} + +#ifndef PUGIXML_NO_STL +TEST_XML(document_save_stream, "") +{ + std::ostringstream oss; + + doc.save(oss, STR(""), pugi::format_no_declaration | pugi::format_raw); + + CHECK(oss.str() == ""); +} + +TEST_XML(document_save_stream_wide, "") +{ + std::basic_ostringstream oss; + + doc.save(oss, STR(""), pugi::format_no_declaration | pugi::format_raw); + + CHECK(oss.str() == L""); +} +#endif + +TEST_XML(document_save_bom, "") +{ + unsigned int flags = format_no_declaration | format_raw | format_write_bom; + + // specific encodings + CHECK(test_save_narrow(doc, flags, encoding_utf8, "\xef\xbb\xbf", 8)); + CHECK(test_save_narrow(doc, flags, encoding_utf16_be, "\xfe\xff\x00<\x00n\x00 \x00/\x00>", 12)); + CHECK(test_save_narrow(doc, flags, encoding_utf16_le, "\xff\xfe<\x00n\x00 \x00/\x00>\x00", 12)); + CHECK(test_save_narrow(doc, flags, encoding_utf32_be, "\x00\x00\xfe\xff\x00\x00\x00<\x00\x00\x00n\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>", 24)); + CHECK(test_save_narrow(doc, flags, encoding_utf32_le, "\xff\xfe\x00\x00<\x00\x00\x00n\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>\x00\x00\x00", 24)); + + // encodings synonyms + CHECK(save_narrow(doc, flags, encoding_utf16) == save_narrow(doc, flags, (is_little_endian() ? encoding_utf16_le : encoding_utf16_be))); + CHECK(save_narrow(doc, flags, encoding_utf32) == save_narrow(doc, flags, (is_little_endian() ? encoding_utf32_le : encoding_utf32_be))); + + size_t wcharsize = sizeof(wchar_t); + CHECK(save_narrow(doc, flags, encoding_wchar) == save_narrow(doc, flags, (wcharsize == 2 ? encoding_utf16 : encoding_utf32))); +} + +TEST_XML(document_save_declaration, "") +{ + xml_writer_string writer; + + doc.save(writer, STR(""), pugi::format_default, get_native_encoding()); + + CHECK(writer.as_string() == STR("\n\n")); +} + +TEST_XML(document_save_declaration_present_first, "") +{ + doc.insert_child_before(node_declaration, doc.first_child()).append_attribute(STR("encoding")) = STR("utf8"); + + xml_writer_string writer; + + doc.save(writer, STR(""), pugi::format_default, get_native_encoding()); + + CHECK(writer.as_string() == STR("\n\n")); +} + +TEST_XML(document_save_declaration_present_second, "") +{ + doc.insert_child_before(node_declaration, doc.first_child()).append_attribute(STR("encoding")) = STR("utf8"); + doc.insert_child_before(node_comment, doc.first_child()).set_value(STR("text")); + + xml_writer_string writer; + + doc.save(writer, STR(""), pugi::format_default, get_native_encoding()); + + CHECK(writer.as_string() == STR("\n\n\n")); +} + +TEST_XML(document_save_declaration_present_last, "") +{ + doc.append_child(node_declaration).append_attribute(STR("encoding")) = STR("utf8"); + + xml_writer_string writer; + + doc.save(writer, STR(""), pugi::format_default, get_native_encoding()); + + // node writer only looks for declaration before the first element child + CHECK(writer.as_string() == STR("\n\n\n")); +} + +TEST_XML(document_save_file, "") +{ +#ifdef __unix + char path[] = "/tmp/pugiXXXXXX"; + + int fd = mkstemp(path); + CHECK(fd != -1); +#elif defined(__CELLOS_LV2__) + const char* path = ""; // no temporary file support +#else + const char* path = tmpnam(0); +#endif + + CHECK(doc.save_file(path)); + + CHECK(doc.load_file(path, pugi::parse_default | pugi::parse_declaration)); + CHECK_NODE(doc, STR("")); + + CHECK(unlink(path) == 0); + +#ifdef __unix + CHECK(close(fd) == 0); +#endif +} + +TEST_XML(document_save_file_error, "") +{ + CHECK(!doc.save_file("tests/data/unknown/output.xml")); +} + +TEST(document_load_buffer) +{ + const pugi::char_t text[] = STR(""); + + pugi::xml_document doc; + + CHECK(doc.load_buffer(text, sizeof(text))); + CHECK_NODE(doc, STR("")); +} + +TEST(document_load_buffer_inplace) +{ + pugi::char_t text[] = STR(""); + + pugi::xml_document doc; + + CHECK(doc.load_buffer_inplace(text, sizeof(text))); + CHECK_NODE(doc, STR("")); +} + +TEST(document_load_buffer_inplace_own) +{ + allocation_function alloc = get_memory_allocation_function(); + + size_t size = strlen("") * sizeof(pugi::char_t); + + pugi::char_t* text = static_cast(alloc(size)); + CHECK(text); + + memcpy(text, STR(""), size); + + pugi::xml_document doc; + + CHECK(doc.load_buffer_inplace_own(text, size)); + CHECK_NODE(doc, STR("")); +} + +TEST(document_parse_result_bool) +{ + xml_parse_result result; + + result.status = status_ok; + CHECK(result); + CHECK(!!result); + CHECK(result == true); + + for (int i = 1; i < 20; ++i) + { + result.status = (xml_parse_status)i; + CHECK(!result); + CHECK(result == false); + } +} + +TEST(document_parse_result_description) +{ + xml_parse_result result; + + for (int i = 0; i < 20; ++i) + { + result.status = (xml_parse_status)i; + + CHECK(result.description() != 0); + CHECK(result.description()[0] != 0); + } +} + +TEST(document_load_fail) +{ + xml_document doc; + CHECK(!doc.load(STR(""))); + CHECK(doc.child(STR("foo")).child(STR("bar"))); +} + +inline void check_utftest_document(const xml_document& doc) +{ + // ascii text + CHECK_STRING(doc.last_child().first_child().name(), STR("English")); + + // check that we have parsed some non-ascii text + CHECK((unsigned)doc.last_child().last_child().name()[0] >= 0x80); + + // check magic string + const pugi::char_t* v = doc.last_child().child(STR("Heavy")).previous_sibling().child_value(); + +#ifdef PUGIXML_WCHAR_MODE + CHECK(v[0] == 0x4e16 && v[1] == 0x754c && v[2] == 0x6709 && v[3] == 0x5f88 && v[4] == 0x591a && v[5] == 0x8bed && v[6] == 0x8a00); + + // last character is a surrogate pair + unsigned int v7 = v[7]; + size_t wcharsize = sizeof(wchar_t); + + CHECK(wcharsize == 2 ? (v[7] == 0xd852 && v[8] == 0xdf62) : (v7 == 0x24b62)); +#else + // unicode string + CHECK_STRING(v, "\xe4\xb8\x96\xe7\x95\x8c\xe6\x9c\x89\xe5\xbe\x88\xe5\xa4\x9a\xe8\xaf\xad\xe8\xa8\x80\xf0\xa4\xad\xa2"); +#endif +} + +TEST(document_load_file_convert_auto) +{ + const char* files[] = + { + "tests/data/utftest_utf16_be.xml", + "tests/data/utftest_utf16_be_bom.xml", + "tests/data/utftest_utf16_be_nodecl.xml", + "tests/data/utftest_utf16_le.xml", + "tests/data/utftest_utf16_le_bom.xml", + "tests/data/utftest_utf16_le_nodecl.xml", + "tests/data/utftest_utf32_be.xml", + "tests/data/utftest_utf32_be_bom.xml", + "tests/data/utftest_utf32_be_nodecl.xml", + "tests/data/utftest_utf32_le.xml", + "tests/data/utftest_utf32_le_bom.xml", + "tests/data/utftest_utf32_le_nodecl.xml", + "tests/data/utftest_utf8.xml", + "tests/data/utftest_utf8_bom.xml", + "tests/data/utftest_utf8_nodecl.xml" + }; + + xml_encoding encodings[] = + { + encoding_utf16_be, encoding_utf16_be, encoding_utf16_be, + encoding_utf16_le, encoding_utf16_le, encoding_utf16_le, + encoding_utf32_be, encoding_utf32_be, encoding_utf32_be, + encoding_utf32_le, encoding_utf32_le, encoding_utf32_le, + encoding_utf8, encoding_utf8, encoding_utf8 + }; + + for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i) + { + xml_document doc; + xml_parse_result res = doc.load_file(files[i]); + + CHECK(res); + CHECK(res.encoding == encodings[i]); + check_utftest_document(doc); + } +} + +TEST(document_load_file_convert_specific) +{ + const char* files[] = + { + "tests/data/utftest_utf16_be.xml", + "tests/data/utftest_utf16_be_bom.xml", + "tests/data/utftest_utf16_be_nodecl.xml", + "tests/data/utftest_utf16_le.xml", + "tests/data/utftest_utf16_le_bom.xml", + "tests/data/utftest_utf16_le_nodecl.xml", + "tests/data/utftest_utf32_be.xml", + "tests/data/utftest_utf32_be_bom.xml", + "tests/data/utftest_utf32_be_nodecl.xml", + "tests/data/utftest_utf32_le.xml", + "tests/data/utftest_utf32_le_bom.xml", + "tests/data/utftest_utf32_le_nodecl.xml", + "tests/data/utftest_utf8.xml", + "tests/data/utftest_utf8_bom.xml", + "tests/data/utftest_utf8_nodecl.xml" + }; + + xml_encoding encodings[] = + { + encoding_utf16_be, encoding_utf16_be, encoding_utf16_be, + encoding_utf16_le, encoding_utf16_le, encoding_utf16_le, + encoding_utf32_be, encoding_utf32_be, encoding_utf32_be, + encoding_utf32_le, encoding_utf32_le, encoding_utf32_le, + encoding_utf8, encoding_utf8, encoding_utf8 + }; + + for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i) + { + for (unsigned int j = 0; j < sizeof(files) / sizeof(files[0]); ++j) + { + xml_encoding encoding = encodings[j]; + + xml_document doc; + xml_parse_result res = doc.load_file(files[i], parse_default, encoding); + + if (encoding == encodings[i]) + { + CHECK(res); + CHECK(res.encoding == encoding); + check_utftest_document(doc); + } + else + { + // should not get past first tag + CHECK(!doc.first_child()); + } + } + } +} + +TEST(document_load_file_convert_native_endianness) +{ + const char* files[2][6] = + { + { + "tests/data/utftest_utf16_be.xml", + "tests/data/utftest_utf16_be_bom.xml", + "tests/data/utftest_utf16_be_nodecl.xml", + "tests/data/utftest_utf32_be.xml", + "tests/data/utftest_utf32_be_bom.xml", + "tests/data/utftest_utf32_be_nodecl.xml", + }, + { + "tests/data/utftest_utf16_le.xml", + "tests/data/utftest_utf16_le_bom.xml", + "tests/data/utftest_utf16_le_nodecl.xml", + "tests/data/utftest_utf32_le.xml", + "tests/data/utftest_utf32_le_bom.xml", + "tests/data/utftest_utf32_le_nodecl.xml", + } + }; + + xml_encoding encodings[] = + { + encoding_utf16, encoding_utf16, encoding_utf16, + encoding_utf32, encoding_utf32, encoding_utf32 + }; + + for (unsigned int i = 0; i < sizeof(files[0]) / sizeof(files[0][0]); ++i) + { + const char* right_file = files[is_little_endian()][i]; + const char* wrong_file = files[!is_little_endian()][i]; + + for (unsigned int j = 0; j < sizeof(encodings) / sizeof(encodings[0]); ++j) + { + xml_encoding encoding = encodings[j]; + + // check file with right endianness + { + xml_document doc; + xml_parse_result res = doc.load_file(right_file, parse_default, encoding); + + if (encoding == encodings[i]) + { + CHECK(res); + check_utftest_document(doc); + } + else + { + // should not get past first tag + CHECK(!doc.first_child()); + } + } + + // check file with wrong endianness + { + xml_document doc; + doc.load_file(wrong_file, parse_default, encoding); + CHECK(!doc.first_child()); + } + } + } +} + +static bool load_file_in_memory(const char* path, char*& data, size_t& size) +{ + FILE* file = fopen(path, "rb"); + if (!file) return false; + + fseek(file, 0, SEEK_END); + size = (size_t)ftell(file); + fseek(file, 0, SEEK_SET); + + data = new char[size]; + + CHECK(fread(data, 1, size, file) == size); + fclose(file); + + return true; +} + +TEST(document_contents_preserve) +{ + struct file_t + { + const char* path; + xml_encoding encoding; + + char* data; + size_t size; + }; + + file_t files[] = + { + {"tests/data/utftest_utf16_be_clean.xml", encoding_utf16_be, 0, 0}, + {"tests/data/utftest_utf16_le_clean.xml", encoding_utf16_le, 0, 0}, + {"tests/data/utftest_utf32_be_clean.xml", encoding_utf32_be, 0, 0}, + {"tests/data/utftest_utf32_le_clean.xml", encoding_utf32_le, 0, 0}, + {"tests/data/utftest_utf8_clean.xml", encoding_utf8, 0, 0} + }; + + // load files in memory + for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i) + { + CHECK(load_file_in_memory(files[i].path, files[i].data, files[i].size)); + } + + // convert each file to each format and compare bitwise + for (unsigned int src = 0; src < sizeof(files) / sizeof(files[0]); ++src) + { + for (unsigned int dst = 0; dst < sizeof(files) / sizeof(files[0]); ++dst) + { + // parse into document (preserve comments, declaration and whitespace pcdata) + xml_document doc; + CHECK(doc.load_buffer(files[src].data, files[src].size, parse_default | parse_ws_pcdata | parse_declaration | parse_comments)); + + // compare saved document with the original (raw formatting, without extra declaration, write bom if it was in original file) + CHECK(test_save_narrow(doc, format_raw | format_no_declaration | format_write_bom, files[dst].encoding, files[dst].data, files[dst].size)); + } + } + + // cleanup + for (unsigned int j = 0; j < sizeof(files) / sizeof(files[0]); ++j) + { + delete[] files[j].data; + } +} + +static bool test_parse_fail(const void* buffer, size_t size, xml_encoding encoding = encoding_utf8) +{ + // copy buffer to heap (to enable out-of-bounds checks) + void* temp = malloc(size); + memcpy(temp, buffer, size); + + // check that this parses without buffer overflows (yielding an error) + xml_document doc; + bool result = doc.load_buffer_inplace(temp, size, parse_default, encoding); + + free(temp); + + return !result; +} + +TEST(document_convert_invalid_utf8) +{ + // invalid 1-byte input + CHECK(test_parse_fail("<\xb0", 2)); + + // invalid 2-byte input + CHECK(test_parse_fail("<\xc0", 2)); + CHECK(test_parse_fail("<\xd0", 2)); + + // invalid 3-byte input + CHECK(test_parse_fail("<\xe2\x80", 3)); + CHECK(test_parse_fail("<\xe2", 2)); + + // invalid 4-byte input + CHECK(test_parse_fail("<\xf2\x97\x98", 4)); + CHECK(test_parse_fail("<\xf2\x97", 3)); + CHECK(test_parse_fail("<\xf2", 2)); + + // invalid 5-byte input + CHECK(test_parse_fail("<\xf8", 2)); +} + +TEST(document_convert_invalid_utf16) +{ + // check non-terminated degenerate handling + CHECK(test_parse_fail("\x00<\xda\x1d", 4, encoding_utf16_be)); + CHECK(test_parse_fail("<\x00\x1d\xda", 4, encoding_utf16_le)); + + // check incorrect leading code + CHECK(test_parse_fail("\x00<\xde\x24", 4, encoding_utf16_be)); + CHECK(test_parse_fail("<\x00\x24\xde", 4, encoding_utf16_le)); +} + +TEST(document_load_buffer_empty) +{ + xml_encoding encodings[] = + { + encoding_auto, + encoding_utf8, + encoding_utf16_le, + encoding_utf16_be, + encoding_utf16, + encoding_utf32_le, + encoding_utf32_be, + encoding_utf32, + encoding_wchar + }; + + char buffer[1]; + + for (unsigned int i = 0; i < sizeof(encodings) / sizeof(encodings[0]); ++i) + { + xml_encoding encoding = encodings[i]; + + xml_document doc; + CHECK(doc.load_buffer(buffer, 0, parse_default, encoding) && !doc.first_child()); + CHECK(doc.load_buffer(0, 0, parse_default, encoding) && !doc.first_child()); + + CHECK(doc.load_buffer_inplace(buffer, 0, parse_default, encoding) && !doc.first_child()); + CHECK(doc.load_buffer_inplace(0, 0, parse_default, encoding) && !doc.first_child()); + + void* own_buffer = pugi::get_memory_allocation_function()(1); + + CHECK(doc.load_buffer_inplace_own(own_buffer, 0, parse_default, encoding) && !doc.first_child()); + CHECK(doc.load_buffer_inplace_own(0, 0, parse_default, encoding) && !doc.first_child()); + } +} + +TEST(document_progressive_truncation) +{ + char* original_data; + size_t original_size; + + CHECK(load_file_in_memory("tests/data/utftest_utf8.xml", original_data, original_size)); + + for (size_t i = 1; i < original_size; ++i) + { + char* truncated_data = new char[i]; + memcpy(truncated_data, original_data, i); + + xml_document doc; + bool result = doc.load_buffer(truncated_data, i); + + // some truncate locations are parseable - those that come after declaration, declaration + doctype, declaration + doctype + comment and eof + CHECK(((i - 21) < 3 || (i - 66) < 3 || (i - 95) < 3 || i >= 3325) ? result : !result); + + delete[] truncated_data; + } + + delete[] original_data; +} diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index 1e38d95..38cc89e 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -1,659 +1,659 @@ -#include "common.hpp" - -#include - -TEST_XML(dom_attr_assign, "") -{ - xml_node node = doc.child(STR("node")); - - node.append_attribute(STR("attr1")) = STR("v1"); - xml_attribute() = STR("v1"); - - node.append_attribute(STR("attr2")) = -2147483647; - node.append_attribute(STR("attr3")) = -2147483647 - 1; - xml_attribute() = -2147483647 - 1; - - node.append_attribute(STR("attr4")) = 4294967295u; - node.append_attribute(STR("attr5")) = 4294967294u; - xml_attribute() = 2147483647; - - node.append_attribute(STR("attr6")) = 0.5; - xml_attribute() = 0.5; - - node.append_attribute(STR("attr7")) = true; - xml_attribute() = true; - - CHECK_NODE(node, STR("")); -} - -TEST_XML(dom_attr_set_value, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(node.append_attribute(STR("attr1")).set_value(STR("v1"))); - CHECK(!xml_attribute().set_value(STR("v1"))); - - CHECK(node.append_attribute(STR("attr2")).set_value(-2147483647)); - CHECK(node.append_attribute(STR("attr3")).set_value(-2147483647 - 1)); - CHECK(!xml_attribute().set_value(-2147483647)); - - CHECK(node.append_attribute(STR("attr4")).set_value(4294967295u)); - CHECK(node.append_attribute(STR("attr5")).set_value(4294967294u)); - CHECK(!xml_attribute().set_value(4294967295u)); - - CHECK(node.append_attribute(STR("attr6")).set_value(0.5)); - CHECK(!xml_attribute().set_value(0.5)); - - CHECK(node.append_attribute(STR("attr7")).set_value(true)); - CHECK(!xml_attribute().set_value(true)); - - CHECK_NODE(node, STR("")); -} - -TEST_XML(dom_node_set_name, "text") -{ - CHECK(doc.child(STR("node")).set_name(STR("n"))); - CHECK(!doc.child(STR("node")).first_child().set_name(STR("n"))); - CHECK(!xml_node().set_name(STR("n"))); - - CHECK_NODE(doc, STR("text")); -} - -TEST_XML(dom_node_set_value, "text") -{ - CHECK(doc.child(STR("node")).first_child().set_value(STR("no text"))); - CHECK(!doc.child(STR("node")).set_value(STR("no text"))); - CHECK(!xml_node().set_value(STR("no text"))); - - CHECK_NODE(doc, STR("no text")); -} - -TEST_XML(dom_node_set_value_allocated, "text") -{ - CHECK(doc.child(STR("node")).first_child().set_value(STR("no text"))); - CHECK(!doc.child(STR("node")).set_value(STR("no text"))); - CHECK(!xml_node().set_value(STR("no text"))); - CHECK(doc.child(STR("node")).first_child().set_value(STR("no text at all"))); - - CHECK_NODE(doc, STR("no text at all")); -} - -TEST_XML(dom_node_append_attribute, "") -{ - CHECK(xml_node().append_attribute(STR("a")) == xml_attribute()); - CHECK(doc.append_attribute(STR("a")) == xml_attribute()); - - xml_attribute a1 = doc.child(STR("node")).append_attribute(STR("a1")); - CHECK(a1); - a1 = STR("v1"); - - xml_attribute a2 = doc.child(STR("node")).append_attribute(STR("a2")); - CHECK(a2 && a1 != a2); - a2 = STR("v2"); - - xml_attribute a3 = doc.child(STR("node")).child(STR("child")).append_attribute(STR("a3")); - CHECK(a3 && a1 != a3 && a2 != a3); - a3 = STR("v3"); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_insert_attribute_after, "") -{ - CHECK(xml_node().insert_attribute_after(STR("a"), xml_attribute()) == xml_attribute()); - - xml_node node = doc.child(STR("node")); - xml_node child = node.child(STR("child")); - - xml_attribute a1 = node.attribute(STR("a1")); - xml_attribute a2 = child.attribute(STR("a2")); - - CHECK(node.insert_attribute_after(STR("a"), xml_attribute()) == xml_attribute()); - CHECK(node.insert_attribute_after(STR("a"), a2) == xml_attribute()); - - xml_attribute a3 = node.insert_attribute_after(STR("a3"), a1); - CHECK(a3 && a3 != a2 && a3 != a1); - a3 = STR("v3"); - - xml_attribute a4 = node.insert_attribute_after(STR("a4"), a1); - CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); - a4 = STR("v4"); - - xml_attribute a5 = node.insert_attribute_after(STR("a5"), a3); - CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); - a5 = STR("v5"); - - CHECK(child.insert_attribute_after(STR("a"), a4) == xml_attribute()); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_insert_attribute_before, "") -{ - CHECK(xml_node().insert_attribute_before(STR("a"), xml_attribute()) == xml_attribute()); - - xml_node node = doc.child(STR("node")); - xml_node child = node.child(STR("child")); - - xml_attribute a1 = node.attribute(STR("a1")); - xml_attribute a2 = child.attribute(STR("a2")); - - CHECK(node.insert_attribute_before(STR("a"), xml_attribute()) == xml_attribute()); - CHECK(node.insert_attribute_before(STR("a"), a2) == xml_attribute()); - - xml_attribute a3 = node.insert_attribute_before(STR("a3"), a1); - CHECK(a3 && a3 != a2 && a3 != a1); - a3 = STR("v3"); - - xml_attribute a4 = node.insert_attribute_before(STR("a4"), a1); - CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); - a4 = STR("v4"); - - xml_attribute a5 = node.insert_attribute_before(STR("a5"), a3); - CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); - a5 = STR("v5"); - - CHECK(child.insert_attribute_before(STR("a"), a4) == xml_attribute()); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_append_copy_attribute, "") -{ - CHECK(xml_node().append_copy(xml_attribute()) == xml_attribute()); - CHECK(xml_node().append_copy(doc.child(STR("node")).attribute(STR("a1"))) == xml_attribute()); - CHECK(doc.append_copy(doc.child(STR("node")).attribute(STR("a1"))) == xml_attribute()); - - xml_node node = doc.child(STR("node")); - xml_node child = node.child(STR("child")); - - xml_attribute a1 = node.attribute(STR("a1")); - xml_attribute a2 = child.attribute(STR("a2")); - - xml_attribute a3 = node.append_copy(a1); - CHECK(a3 && a3 != a2 && a3 != a1); - - xml_attribute a4 = node.append_copy(a2); - CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); - - xml_attribute a5 = node.last_child().append_copy(a1); - CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); - - CHECK_NODE(doc, STR("")); - - a3.set_name(STR("a3")); - a3 = STR("v3"); - - a4.set_name(STR("a4")); - a4 = STR("v4"); - - a5.set_name(STR("a5")); - a5 = STR("v5"); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_insert_copy_after_attribute, "") -{ - CHECK(xml_node().insert_copy_after(xml_attribute(), xml_attribute()) == xml_attribute()); - - xml_node node = doc.child(STR("node")); - xml_node child = node.child(STR("child")); - - xml_attribute a1 = node.attribute(STR("a1")); - xml_attribute a2 = child.attribute(STR("a2")); - - CHECK(node.insert_copy_after(a1, xml_attribute()) == xml_attribute()); - CHECK(node.insert_copy_after(xml_attribute(), a1) == xml_attribute()); - CHECK(node.insert_copy_after(a2, a2) == xml_attribute()); - - xml_attribute a3 = node.insert_copy_after(a1, a1); - CHECK(a3 && a3 != a2 && a3 != a1); - - xml_attribute a4 = node.insert_copy_after(a2, a1); - CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); - - xml_attribute a5 = node.insert_copy_after(a4, a1); - CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); - - CHECK(child.insert_copy_after(a4, a4) == xml_attribute()); - - CHECK_NODE(doc, STR("")); - - a3.set_name(STR("a3")); - a3 = STR("v3"); - - a4.set_name(STR("a4")); - a4 = STR("v4"); - - a5.set_name(STR("a5")); - a5 = STR("v5"); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_insert_copy_before_attribute, "") -{ - CHECK(xml_node().insert_copy_before(xml_attribute(), xml_attribute()) == xml_attribute()); - - xml_node node = doc.child(STR("node")); - xml_node child = node.child(STR("child")); - - xml_attribute a1 = node.attribute(STR("a1")); - xml_attribute a2 = child.attribute(STR("a2")); - - CHECK(node.insert_copy_before(a1, xml_attribute()) == xml_attribute()); - CHECK(node.insert_copy_before(xml_attribute(), a1) == xml_attribute()); - CHECK(node.insert_copy_before(a2, a2) == xml_attribute()); - - xml_attribute a3 = node.insert_copy_before(a1, a1); - CHECK(a3 && a3 != a2 && a3 != a1); - - xml_attribute a4 = node.insert_copy_before(a2, a1); - CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); - - xml_attribute a5 = node.insert_copy_before(a4, a1); - CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); - - CHECK(child.insert_copy_before(a4, a4) == xml_attribute()); - - CHECK_NODE(doc, STR("")); - - a3.set_name(STR("a3")); - a3 = STR("v3"); - - a4.set_name(STR("a4")); - a4 = STR("v4"); - - a5.set_name(STR("a5")); - a5 = STR("v5"); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_remove_attribute, "") -{ - CHECK(!xml_node().remove_attribute(STR("a"))); - CHECK(!xml_node().remove_attribute(xml_attribute())); - - xml_node node = doc.child(STR("node")); - xml_node child = node.child(STR("child")); - - CHECK(!node.remove_attribute(STR("a"))); - CHECK(!node.remove_attribute(xml_attribute())); - CHECK(!node.remove_attribute(child.attribute(STR("a4")))); - - CHECK_NODE(doc, STR("")); - - CHECK(node.remove_attribute(STR("a1"))); - CHECK(node.remove_attribute(node.attribute(STR("a3")))); - CHECK(child.remove_attribute(STR("a4"))); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_append_child, "foo") -{ - CHECK(xml_node().append_child() == xml_node()); - CHECK(doc.child(STR("node")).first_child().append_child() == xml_node()); - CHECK(doc.append_child(node_document) == xml_node()); - CHECK(doc.append_child(node_null) == xml_node()); - - xml_node n1 = doc.child(STR("node")).append_child(); - CHECK(n1); - CHECK(n1.set_name(STR("n1"))); - - xml_node n2 = doc.child(STR("node")).append_child(); - CHECK(n2 && n1 != n2); - CHECK(n2.set_name(STR("n2"))); - - xml_node n3 = doc.child(STR("node")).child(STR("child")).append_child(node_pcdata); - CHECK(n3 && n1 != n3 && n2 != n3); - CHECK(n3.set_value(STR("n3"))); - - xml_node n4 = doc.append_child(node_comment); - CHECK(n4 && n1 != n4 && n2 != n4 && n3 != n4); - CHECK(n4.set_value(STR("n4"))); - - CHECK_NODE(doc, STR("foon3")); -} - -TEST_XML(dom_node_insert_child_after, "foo") -{ - CHECK(xml_node().insert_child_after(node_element, xml_node()) == xml_node()); - CHECK(doc.child(STR("node")).first_child().insert_child_after(node_element, xml_node()) == xml_node()); - CHECK(doc.insert_child_after(node_document, xml_node()) == xml_node()); - CHECK(doc.insert_child_after(node_null, xml_node()) == xml_node()); - - xml_node node = doc.child(STR("node")); - xml_node child = node.child(STR("child")); - - CHECK(node.insert_child_after(node_element, node) == xml_node()); - CHECK(child.insert_child_after(node_element, node) == xml_node()); - - xml_node n1 = node.insert_child_after(node_element, child); - CHECK(n1 && n1 != node && n1 != child); - CHECK(n1.set_name(STR("n1"))); - - xml_node n2 = node.insert_child_after(node_element, child); - CHECK(n2 && n2 != node && n2 != child && n2 != n1); - CHECK(n2.set_name(STR("n2"))); - - xml_node n3 = node.insert_child_after(node_pcdata, n2); - CHECK(n3 && n3 != node && n3 != child && n3 != n1 && n3 != n2); - CHECK(n3.set_value(STR("n3"))); - - xml_node n4 = node.insert_child_after(node_pi, node.first_child()); - CHECK(n4 && n4 != node && n4 != child && n4 != n1 && n4 != n2 && n4 != n3); - CHECK(n4.set_name(STR("n4"))); - - CHECK(child.insert_child_after(node_element, n3) == xml_node()); - - CHECK_NODE(doc, STR("foon3")); -} - -TEST_XML(dom_node_insert_child_before, "foo") -{ - CHECK(xml_node().insert_child_before(node_element, xml_node()) == xml_node()); - CHECK(doc.child(STR("node")).first_child().insert_child_before(node_element, xml_node()) == xml_node()); - CHECK(doc.insert_child_before(node_document, xml_node()) == xml_node()); - CHECK(doc.insert_child_before(node_null, xml_node()) == xml_node()); - - xml_node node = doc.child(STR("node")); - xml_node child = node.child(STR("child")); - - CHECK(node.insert_child_before(node_element, node) == xml_node()); - CHECK(child.insert_child_before(node_element, node) == xml_node()); - - xml_node n1 = node.insert_child_before(node_element, child); - CHECK(n1 && n1 != node && n1 != child); - CHECK(n1.set_name(STR("n1"))); - - xml_node n2 = node.insert_child_before(node_element, child); - CHECK(n2 && n2 != node && n2 != child && n2 != n1); - CHECK(n2.set_name(STR("n2"))); - - xml_node n3 = node.insert_child_before(node_pcdata, n2); - CHECK(n3 && n3 != node && n3 != child && n3 != n1 && n3 != n2); - CHECK(n3.set_value(STR("n3"))); - - xml_node n4 = node.insert_child_before(node_pi, node.first_child()); - CHECK(n4 && n4 != node && n4 != child && n4 != n1 && n4 != n2 && n4 != n3); - CHECK(n4.set_name(STR("n4"))); - - CHECK(child.insert_child_before(node_element, n3) == xml_node()); - - CHECK_NODE(doc, STR("foon3")); -} - -TEST_XML(dom_node_remove_child, "") -{ - CHECK(!xml_node().remove_child(STR("a"))); - CHECK(!xml_node().remove_child(xml_node())); - - xml_node node = doc.child(STR("node")); - xml_node child = node.child(STR("child")); - - CHECK(!node.remove_child(STR("a"))); - CHECK(!node.remove_child(xml_node())); - CHECK(!node.remove_child(child.child(STR("n4")))); - - CHECK_NODE(doc, STR("")); - - CHECK(node.remove_child(STR("n1"))); - CHECK(node.remove_child(node.child(STR("n3")))); - CHECK(child.remove_child(STR("n4"))); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_remove_child_complex, "") -{ - doc.child(STR("node")).remove_child(STR("n1")); - - CHECK_NODE(doc, STR("")); - - CHECK(doc.remove_child(STR("node"))); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_remove_child_complex_allocated, "") -{ - doc.append_copy(doc.child(STR("node"))); - - CHECK(doc.remove_child(STR("node"))); - CHECK(doc.remove_child(STR("node"))); - - CHECK_NODE(doc, STR("")); -} - -TEST_XML(dom_node_append_copy, "foo") -{ - CHECK(xml_node().append_copy(xml_node()) == xml_node()); - CHECK(doc.child(STR("node")).first_child().append_copy(doc.child(STR("node"))) == xml_node()); - CHECK(doc.append_copy(doc) == xml_node()); - CHECK(doc.append_copy(xml_node()) == xml_node()); - - xml_node n1 = doc.child(STR("node")).append_copy(doc.child(STR("node")).first_child()); - CHECK(n1); - CHECK_STRING(n1.value(), STR("foo")); - CHECK_NODE(doc, STR("foofoo")); - - xml_node n2 = doc.child(STR("node")).append_copy(doc.child(STR("node")).child(STR("child"))); - CHECK(n2 && n2 != n1); - CHECK_STRING(n2.name(), STR("child")); - CHECK_NODE(doc, STR("foofoo")); - - xml_node n3 = doc.child(STR("node")).child(STR("child")).append_copy(doc.child(STR("node")).first_child()); - CHECK(n3 && n3 != n1 && n3 != n2); - CHECK_STRING(n3.value(), STR("foo")); - CHECK_NODE(doc, STR("foofoofoo")); -} - -TEST_XML(dom_node_insert_copy_after, "foo") -{ - CHECK(xml_node().insert_copy_after(xml_node(), xml_node()) == xml_node()); - CHECK(doc.child(STR("node")).first_child().insert_copy_after(doc.child(STR("node")), doc.child(STR("node"))) == xml_node()); - CHECK(doc.insert_copy_after(doc, doc) == xml_node()); - CHECK(doc.insert_copy_after(xml_node(), doc.child(STR("node"))) == xml_node()); - CHECK(doc.insert_copy_after(doc.child(STR("node")), xml_node()) == xml_node()); - - xml_node n1 = doc.child(STR("node")).insert_copy_after(doc.child(STR("node")).child(STR("child")), doc.child(STR("node")).first_child()); - CHECK(n1); - CHECK_STRING(n1.name(), STR("child")); - CHECK_NODE(doc, STR("foo")); - - xml_node n2 = doc.child(STR("node")).insert_copy_after(doc.child(STR("node")).first_child(), doc.child(STR("node")).last_child()); - CHECK(n2 && n2 != n1); - CHECK_STRING(n2.value(), STR("foo")); - CHECK_NODE(doc, STR("foofoo")); - - xml_node n3 = doc.child(STR("node")).insert_copy_after(doc.child(STR("node")).first_child(), doc.child(STR("node")).first_child()); - CHECK(n3 && n3 != n1 && n3 != n2); - CHECK_STRING(n3.value(), STR("foo")); - CHECK_NODE(doc, STR("foofoofoo")); -} - -TEST_XML(dom_node_insert_copy_before, "foo") -{ - CHECK(xml_node().insert_copy_before(xml_node(), xml_node()) == xml_node()); - CHECK(doc.child(STR("node")).first_child().insert_copy_before(doc.child(STR("node")), doc.child(STR("node"))) == xml_node()); - CHECK(doc.insert_copy_before(doc, doc) == xml_node()); - CHECK(doc.insert_copy_before(xml_node(), doc.child(STR("node"))) == xml_node()); - CHECK(doc.insert_copy_before(doc.child(STR("node")), xml_node()) == xml_node()); - - xml_node n1 = doc.child(STR("node")).insert_copy_before(doc.child(STR("node")).child(STR("child")), doc.child(STR("node")).first_child()); - CHECK(n1); - CHECK_STRING(n1.name(), STR("child")); - CHECK_NODE(doc, STR("foo")); - - xml_node n2 = doc.child(STR("node")).insert_copy_before(doc.child(STR("node")).first_child(), doc.child(STR("node")).last_child()); - CHECK(n2 && n2 != n1); - CHECK_STRING(n2.name(), STR("child")); - CHECK_NODE(doc, STR("foo")); - - xml_node n3 = doc.child(STR("node")).insert_copy_before(doc.child(STR("node")).first_child().next_sibling(), doc.child(STR("node")).first_child()); - CHECK(n3 && n3 != n1 && n3 != n2); - CHECK_STRING(n3.value(), STR("foo")); - CHECK_NODE(doc, STR("foofoo")); -} - -TEST_XML(dom_node_copy_recursive, "foo") -{ - doc.child(STR("node")).append_copy(doc.child(STR("node"))); - CHECK_NODE(doc, STR("foofoo")); -} - -TEST_XML(dom_node_copy_crossdoc, "") -{ - xml_document newdoc; - newdoc.append_copy(doc.child(STR("node"))); - CHECK_NODE(doc, STR("")); - CHECK_NODE(newdoc, STR("")); -} - -TEST_XML_FLAGS(dom_node_copy_types, "pcdata", parse_default | parse_pi | parse_comments | parse_declaration) -{ - doc.append_copy(doc.child(STR("root"))); - CHECK_NODE(doc, STR("pcdatapcdata")); - - doc.insert_copy_before(doc.first_child(), doc.first_child()); - CHECK_NODE(doc, STR("pcdatapcdata")); -} - -TEST_XML(dom_attr_assign_large_number, "") -{ - xml_node node = doc.child(STR("node")); - - node.attribute(STR("attr1")) = FLT_MAX; - node.attribute(STR("attr2")) = DBL_MAX; - - CHECK(test_node(node, STR(""), STR(""), pugi::format_raw) || - test_node(node, STR(""), STR(""), pugi::format_raw)); -} - -TEST(dom_node_declaration_name) -{ - xml_document doc; - doc.append_child(node_declaration); - - // name 'xml' is auto-assigned - CHECK(doc.first_child().type() == node_declaration); - CHECK_STRING(doc.first_child().name(), STR("xml")); - - doc.insert_child_after(node_declaration, doc.first_child()); - doc.insert_child_before(node_declaration, doc.first_child()); - - CHECK_NODE(doc, STR("")); -} - -TEST(dom_node_declaration_top_level) -{ - xml_document doc; - doc.append_child().set_name(STR("node")); - - xml_node node = doc.first_child(); - node.append_child(node_pcdata).set_value(STR("text")); - - CHECK(node.insert_child_before(node_declaration, node.first_child()) == xml_node()); - CHECK(node.insert_child_after(node_declaration, node.first_child()) == xml_node()); - CHECK(node.append_child(node_declaration) == xml_node()); - - CHECK_NODE(doc, STR("text")); - - CHECK(doc.insert_child_before(node_declaration, node)); - CHECK(doc.insert_child_after(node_declaration, node)); - CHECK(doc.append_child(node_declaration)); - - CHECK_NODE(doc, STR("text")); -} - -TEST(dom_node_declaration_copy) -{ - xml_document doc; - doc.append_child(node_declaration); - - doc.append_child().set_name(STR("node")); - - doc.last_child().append_copy(doc.first_child()); - - CHECK_NODE(doc, STR("")); -} - -TEST(dom_string_out_of_memory) -{ - unsigned int length = 65536; - - char_t* string = new char_t[length + 1]; - for (unsigned int i = 0; i < length; ++i) string[i] = 'a'; - string[length] = 0; - - xml_document doc; - xml_node node = doc.append_child(); - xml_attribute attr = node.append_attribute(STR("a")); - xml_node text = node.append_child(node_pcdata); - - // no value => long value - test_runner::_memory_fail_threshold = 32; - - CHECK(!node.set_name(string)); - CHECK(!text.set_value(string)); - CHECK(!attr.set_name(string)); - CHECK(!attr.set_value(string)); - - // set some names/values - test_runner::_memory_fail_threshold = 0; - - node.set_name(STR("n")); - attr.set_value(STR("v")); - text.set_value(STR("t")); - - // some value => long value - test_runner::_memory_fail_threshold = 32; - - CHECK(!node.set_name(string)); - CHECK(!text.set_value(string)); - CHECK(!attr.set_name(string)); - CHECK(!attr.set_value(string)); - - // check that original state was preserved - test_runner::_memory_fail_threshold = 0; - - CHECK_NODE(doc, STR("t")); -} - -TEST(dom_node_out_of_memory) -{ - test_runner::_memory_fail_threshold = 65536; - - // exhaust memory limit - xml_document doc; - - xml_node n = doc.append_child(); - CHECK(n.set_name(STR("n"))); - - xml_attribute a = n.append_attribute(STR("a")); - CHECK(a); - - while (n.append_child(node_comment) || n.append_attribute(STR("b"))) - { - // nop - } - - // verify all node modification operations - CHECK(!n.append_child()); - CHECK(!n.insert_child_after(node_element, n.first_child())); - CHECK(!n.insert_child_before(node_element, n.first_child())); - CHECK(!n.append_attribute(STR(""))); - CHECK(!n.insert_attribute_after(STR(""), a)); - CHECK(!n.insert_attribute_before(STR(""), a)); - - // verify node copy operations - CHECK(!n.append_copy(n.first_child())); - CHECK(!n.insert_copy_after(n.first_child(), n.first_child())); - CHECK(!n.insert_copy_before(n.first_child(), n.first_child())); - CHECK(!n.append_copy(a)); - CHECK(!n.insert_copy_after(a, a)); - CHECK(!n.insert_copy_before(a, a)); -} +#include "common.hpp" + +#include + +TEST_XML(dom_attr_assign, "") +{ + xml_node node = doc.child(STR("node")); + + node.append_attribute(STR("attr1")) = STR("v1"); + xml_attribute() = STR("v1"); + + node.append_attribute(STR("attr2")) = -2147483647; + node.append_attribute(STR("attr3")) = -2147483647 - 1; + xml_attribute() = -2147483647 - 1; + + node.append_attribute(STR("attr4")) = 4294967295u; + node.append_attribute(STR("attr5")) = 4294967294u; + xml_attribute() = 2147483647; + + node.append_attribute(STR("attr6")) = 0.5; + xml_attribute() = 0.5; + + node.append_attribute(STR("attr7")) = true; + xml_attribute() = true; + + CHECK_NODE(node, STR("")); +} + +TEST_XML(dom_attr_set_value, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(node.append_attribute(STR("attr1")).set_value(STR("v1"))); + CHECK(!xml_attribute().set_value(STR("v1"))); + + CHECK(node.append_attribute(STR("attr2")).set_value(-2147483647)); + CHECK(node.append_attribute(STR("attr3")).set_value(-2147483647 - 1)); + CHECK(!xml_attribute().set_value(-2147483647)); + + CHECK(node.append_attribute(STR("attr4")).set_value(4294967295u)); + CHECK(node.append_attribute(STR("attr5")).set_value(4294967294u)); + CHECK(!xml_attribute().set_value(4294967295u)); + + CHECK(node.append_attribute(STR("attr6")).set_value(0.5)); + CHECK(!xml_attribute().set_value(0.5)); + + CHECK(node.append_attribute(STR("attr7")).set_value(true)); + CHECK(!xml_attribute().set_value(true)); + + CHECK_NODE(node, STR("")); +} + +TEST_XML(dom_node_set_name, "text") +{ + CHECK(doc.child(STR("node")).set_name(STR("n"))); + CHECK(!doc.child(STR("node")).first_child().set_name(STR("n"))); + CHECK(!xml_node().set_name(STR("n"))); + + CHECK_NODE(doc, STR("text")); +} + +TEST_XML(dom_node_set_value, "text") +{ + CHECK(doc.child(STR("node")).first_child().set_value(STR("no text"))); + CHECK(!doc.child(STR("node")).set_value(STR("no text"))); + CHECK(!xml_node().set_value(STR("no text"))); + + CHECK_NODE(doc, STR("no text")); +} + +TEST_XML(dom_node_set_value_allocated, "text") +{ + CHECK(doc.child(STR("node")).first_child().set_value(STR("no text"))); + CHECK(!doc.child(STR("node")).set_value(STR("no text"))); + CHECK(!xml_node().set_value(STR("no text"))); + CHECK(doc.child(STR("node")).first_child().set_value(STR("no text at all"))); + + CHECK_NODE(doc, STR("no text at all")); +} + +TEST_XML(dom_node_append_attribute, "") +{ + CHECK(xml_node().append_attribute(STR("a")) == xml_attribute()); + CHECK(doc.append_attribute(STR("a")) == xml_attribute()); + + xml_attribute a1 = doc.child(STR("node")).append_attribute(STR("a1")); + CHECK(a1); + a1 = STR("v1"); + + xml_attribute a2 = doc.child(STR("node")).append_attribute(STR("a2")); + CHECK(a2 && a1 != a2); + a2 = STR("v2"); + + xml_attribute a3 = doc.child(STR("node")).child(STR("child")).append_attribute(STR("a3")); + CHECK(a3 && a1 != a3 && a2 != a3); + a3 = STR("v3"); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_insert_attribute_after, "") +{ + CHECK(xml_node().insert_attribute_after(STR("a"), xml_attribute()) == xml_attribute()); + + xml_node node = doc.child(STR("node")); + xml_node child = node.child(STR("child")); + + xml_attribute a1 = node.attribute(STR("a1")); + xml_attribute a2 = child.attribute(STR("a2")); + + CHECK(node.insert_attribute_after(STR("a"), xml_attribute()) == xml_attribute()); + CHECK(node.insert_attribute_after(STR("a"), a2) == xml_attribute()); + + xml_attribute a3 = node.insert_attribute_after(STR("a3"), a1); + CHECK(a3 && a3 != a2 && a3 != a1); + a3 = STR("v3"); + + xml_attribute a4 = node.insert_attribute_after(STR("a4"), a1); + CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); + a4 = STR("v4"); + + xml_attribute a5 = node.insert_attribute_after(STR("a5"), a3); + CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); + a5 = STR("v5"); + + CHECK(child.insert_attribute_after(STR("a"), a4) == xml_attribute()); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_insert_attribute_before, "") +{ + CHECK(xml_node().insert_attribute_before(STR("a"), xml_attribute()) == xml_attribute()); + + xml_node node = doc.child(STR("node")); + xml_node child = node.child(STR("child")); + + xml_attribute a1 = node.attribute(STR("a1")); + xml_attribute a2 = child.attribute(STR("a2")); + + CHECK(node.insert_attribute_before(STR("a"), xml_attribute()) == xml_attribute()); + CHECK(node.insert_attribute_before(STR("a"), a2) == xml_attribute()); + + xml_attribute a3 = node.insert_attribute_before(STR("a3"), a1); + CHECK(a3 && a3 != a2 && a3 != a1); + a3 = STR("v3"); + + xml_attribute a4 = node.insert_attribute_before(STR("a4"), a1); + CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); + a4 = STR("v4"); + + xml_attribute a5 = node.insert_attribute_before(STR("a5"), a3); + CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); + a5 = STR("v5"); + + CHECK(child.insert_attribute_before(STR("a"), a4) == xml_attribute()); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_append_copy_attribute, "") +{ + CHECK(xml_node().append_copy(xml_attribute()) == xml_attribute()); + CHECK(xml_node().append_copy(doc.child(STR("node")).attribute(STR("a1"))) == xml_attribute()); + CHECK(doc.append_copy(doc.child(STR("node")).attribute(STR("a1"))) == xml_attribute()); + + xml_node node = doc.child(STR("node")); + xml_node child = node.child(STR("child")); + + xml_attribute a1 = node.attribute(STR("a1")); + xml_attribute a2 = child.attribute(STR("a2")); + + xml_attribute a3 = node.append_copy(a1); + CHECK(a3 && a3 != a2 && a3 != a1); + + xml_attribute a4 = node.append_copy(a2); + CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); + + xml_attribute a5 = node.last_child().append_copy(a1); + CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); + + CHECK_NODE(doc, STR("")); + + a3.set_name(STR("a3")); + a3 = STR("v3"); + + a4.set_name(STR("a4")); + a4 = STR("v4"); + + a5.set_name(STR("a5")); + a5 = STR("v5"); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_insert_copy_after_attribute, "") +{ + CHECK(xml_node().insert_copy_after(xml_attribute(), xml_attribute()) == xml_attribute()); + + xml_node node = doc.child(STR("node")); + xml_node child = node.child(STR("child")); + + xml_attribute a1 = node.attribute(STR("a1")); + xml_attribute a2 = child.attribute(STR("a2")); + + CHECK(node.insert_copy_after(a1, xml_attribute()) == xml_attribute()); + CHECK(node.insert_copy_after(xml_attribute(), a1) == xml_attribute()); + CHECK(node.insert_copy_after(a2, a2) == xml_attribute()); + + xml_attribute a3 = node.insert_copy_after(a1, a1); + CHECK(a3 && a3 != a2 && a3 != a1); + + xml_attribute a4 = node.insert_copy_after(a2, a1); + CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); + + xml_attribute a5 = node.insert_copy_after(a4, a1); + CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); + + CHECK(child.insert_copy_after(a4, a4) == xml_attribute()); + + CHECK_NODE(doc, STR("")); + + a3.set_name(STR("a3")); + a3 = STR("v3"); + + a4.set_name(STR("a4")); + a4 = STR("v4"); + + a5.set_name(STR("a5")); + a5 = STR("v5"); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_insert_copy_before_attribute, "") +{ + CHECK(xml_node().insert_copy_before(xml_attribute(), xml_attribute()) == xml_attribute()); + + xml_node node = doc.child(STR("node")); + xml_node child = node.child(STR("child")); + + xml_attribute a1 = node.attribute(STR("a1")); + xml_attribute a2 = child.attribute(STR("a2")); + + CHECK(node.insert_copy_before(a1, xml_attribute()) == xml_attribute()); + CHECK(node.insert_copy_before(xml_attribute(), a1) == xml_attribute()); + CHECK(node.insert_copy_before(a2, a2) == xml_attribute()); + + xml_attribute a3 = node.insert_copy_before(a1, a1); + CHECK(a3 && a3 != a2 && a3 != a1); + + xml_attribute a4 = node.insert_copy_before(a2, a1); + CHECK(a4 && a4 != a3 && a4 != a2 && a4 != a1); + + xml_attribute a5 = node.insert_copy_before(a4, a1); + CHECK(a5 && a5 != a4 && a5 != a3 && a5 != a2 && a5 != a1); + + CHECK(child.insert_copy_before(a4, a4) == xml_attribute()); + + CHECK_NODE(doc, STR("")); + + a3.set_name(STR("a3")); + a3 = STR("v3"); + + a4.set_name(STR("a4")); + a4 = STR("v4"); + + a5.set_name(STR("a5")); + a5 = STR("v5"); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_remove_attribute, "") +{ + CHECK(!xml_node().remove_attribute(STR("a"))); + CHECK(!xml_node().remove_attribute(xml_attribute())); + + xml_node node = doc.child(STR("node")); + xml_node child = node.child(STR("child")); + + CHECK(!node.remove_attribute(STR("a"))); + CHECK(!node.remove_attribute(xml_attribute())); + CHECK(!node.remove_attribute(child.attribute(STR("a4")))); + + CHECK_NODE(doc, STR("")); + + CHECK(node.remove_attribute(STR("a1"))); + CHECK(node.remove_attribute(node.attribute(STR("a3")))); + CHECK(child.remove_attribute(STR("a4"))); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_append_child, "foo") +{ + CHECK(xml_node().append_child() == xml_node()); + CHECK(doc.child(STR("node")).first_child().append_child() == xml_node()); + CHECK(doc.append_child(node_document) == xml_node()); + CHECK(doc.append_child(node_null) == xml_node()); + + xml_node n1 = doc.child(STR("node")).append_child(); + CHECK(n1); + CHECK(n1.set_name(STR("n1"))); + + xml_node n2 = doc.child(STR("node")).append_child(); + CHECK(n2 && n1 != n2); + CHECK(n2.set_name(STR("n2"))); + + xml_node n3 = doc.child(STR("node")).child(STR("child")).append_child(node_pcdata); + CHECK(n3 && n1 != n3 && n2 != n3); + CHECK(n3.set_value(STR("n3"))); + + xml_node n4 = doc.append_child(node_comment); + CHECK(n4 && n1 != n4 && n2 != n4 && n3 != n4); + CHECK(n4.set_value(STR("n4"))); + + CHECK_NODE(doc, STR("foon3")); +} + +TEST_XML(dom_node_insert_child_after, "foo") +{ + CHECK(xml_node().insert_child_after(node_element, xml_node()) == xml_node()); + CHECK(doc.child(STR("node")).first_child().insert_child_after(node_element, xml_node()) == xml_node()); + CHECK(doc.insert_child_after(node_document, xml_node()) == xml_node()); + CHECK(doc.insert_child_after(node_null, xml_node()) == xml_node()); + + xml_node node = doc.child(STR("node")); + xml_node child = node.child(STR("child")); + + CHECK(node.insert_child_after(node_element, node) == xml_node()); + CHECK(child.insert_child_after(node_element, node) == xml_node()); + + xml_node n1 = node.insert_child_after(node_element, child); + CHECK(n1 && n1 != node && n1 != child); + CHECK(n1.set_name(STR("n1"))); + + xml_node n2 = node.insert_child_after(node_element, child); + CHECK(n2 && n2 != node && n2 != child && n2 != n1); + CHECK(n2.set_name(STR("n2"))); + + xml_node n3 = node.insert_child_after(node_pcdata, n2); + CHECK(n3 && n3 != node && n3 != child && n3 != n1 && n3 != n2); + CHECK(n3.set_value(STR("n3"))); + + xml_node n4 = node.insert_child_after(node_pi, node.first_child()); + CHECK(n4 && n4 != node && n4 != child && n4 != n1 && n4 != n2 && n4 != n3); + CHECK(n4.set_name(STR("n4"))); + + CHECK(child.insert_child_after(node_element, n3) == xml_node()); + + CHECK_NODE(doc, STR("foon3")); +} + +TEST_XML(dom_node_insert_child_before, "foo") +{ + CHECK(xml_node().insert_child_before(node_element, xml_node()) == xml_node()); + CHECK(doc.child(STR("node")).first_child().insert_child_before(node_element, xml_node()) == xml_node()); + CHECK(doc.insert_child_before(node_document, xml_node()) == xml_node()); + CHECK(doc.insert_child_before(node_null, xml_node()) == xml_node()); + + xml_node node = doc.child(STR("node")); + xml_node child = node.child(STR("child")); + + CHECK(node.insert_child_before(node_element, node) == xml_node()); + CHECK(child.insert_child_before(node_element, node) == xml_node()); + + xml_node n1 = node.insert_child_before(node_element, child); + CHECK(n1 && n1 != node && n1 != child); + CHECK(n1.set_name(STR("n1"))); + + xml_node n2 = node.insert_child_before(node_element, child); + CHECK(n2 && n2 != node && n2 != child && n2 != n1); + CHECK(n2.set_name(STR("n2"))); + + xml_node n3 = node.insert_child_before(node_pcdata, n2); + CHECK(n3 && n3 != node && n3 != child && n3 != n1 && n3 != n2); + CHECK(n3.set_value(STR("n3"))); + + xml_node n4 = node.insert_child_before(node_pi, node.first_child()); + CHECK(n4 && n4 != node && n4 != child && n4 != n1 && n4 != n2 && n4 != n3); + CHECK(n4.set_name(STR("n4"))); + + CHECK(child.insert_child_before(node_element, n3) == xml_node()); + + CHECK_NODE(doc, STR("foon3")); +} + +TEST_XML(dom_node_remove_child, "") +{ + CHECK(!xml_node().remove_child(STR("a"))); + CHECK(!xml_node().remove_child(xml_node())); + + xml_node node = doc.child(STR("node")); + xml_node child = node.child(STR("child")); + + CHECK(!node.remove_child(STR("a"))); + CHECK(!node.remove_child(xml_node())); + CHECK(!node.remove_child(child.child(STR("n4")))); + + CHECK_NODE(doc, STR("")); + + CHECK(node.remove_child(STR("n1"))); + CHECK(node.remove_child(node.child(STR("n3")))); + CHECK(child.remove_child(STR("n4"))); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_remove_child_complex, "") +{ + doc.child(STR("node")).remove_child(STR("n1")); + + CHECK_NODE(doc, STR("")); + + CHECK(doc.remove_child(STR("node"))); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_remove_child_complex_allocated, "") +{ + doc.append_copy(doc.child(STR("node"))); + + CHECK(doc.remove_child(STR("node"))); + CHECK(doc.remove_child(STR("node"))); + + CHECK_NODE(doc, STR("")); +} + +TEST_XML(dom_node_append_copy, "foo") +{ + CHECK(xml_node().append_copy(xml_node()) == xml_node()); + CHECK(doc.child(STR("node")).first_child().append_copy(doc.child(STR("node"))) == xml_node()); + CHECK(doc.append_copy(doc) == xml_node()); + CHECK(doc.append_copy(xml_node()) == xml_node()); + + xml_node n1 = doc.child(STR("node")).append_copy(doc.child(STR("node")).first_child()); + CHECK(n1); + CHECK_STRING(n1.value(), STR("foo")); + CHECK_NODE(doc, STR("foofoo")); + + xml_node n2 = doc.child(STR("node")).append_copy(doc.child(STR("node")).child(STR("child"))); + CHECK(n2 && n2 != n1); + CHECK_STRING(n2.name(), STR("child")); + CHECK_NODE(doc, STR("foofoo")); + + xml_node n3 = doc.child(STR("node")).child(STR("child")).append_copy(doc.child(STR("node")).first_child()); + CHECK(n3 && n3 != n1 && n3 != n2); + CHECK_STRING(n3.value(), STR("foo")); + CHECK_NODE(doc, STR("foofoofoo")); +} + +TEST_XML(dom_node_insert_copy_after, "foo") +{ + CHECK(xml_node().insert_copy_after(xml_node(), xml_node()) == xml_node()); + CHECK(doc.child(STR("node")).first_child().insert_copy_after(doc.child(STR("node")), doc.child(STR("node"))) == xml_node()); + CHECK(doc.insert_copy_after(doc, doc) == xml_node()); + CHECK(doc.insert_copy_after(xml_node(), doc.child(STR("node"))) == xml_node()); + CHECK(doc.insert_copy_after(doc.child(STR("node")), xml_node()) == xml_node()); + + xml_node n1 = doc.child(STR("node")).insert_copy_after(doc.child(STR("node")).child(STR("child")), doc.child(STR("node")).first_child()); + CHECK(n1); + CHECK_STRING(n1.name(), STR("child")); + CHECK_NODE(doc, STR("foo")); + + xml_node n2 = doc.child(STR("node")).insert_copy_after(doc.child(STR("node")).first_child(), doc.child(STR("node")).last_child()); + CHECK(n2 && n2 != n1); + CHECK_STRING(n2.value(), STR("foo")); + CHECK_NODE(doc, STR("foofoo")); + + xml_node n3 = doc.child(STR("node")).insert_copy_after(doc.child(STR("node")).first_child(), doc.child(STR("node")).first_child()); + CHECK(n3 && n3 != n1 && n3 != n2); + CHECK_STRING(n3.value(), STR("foo")); + CHECK_NODE(doc, STR("foofoofoo")); +} + +TEST_XML(dom_node_insert_copy_before, "foo") +{ + CHECK(xml_node().insert_copy_before(xml_node(), xml_node()) == xml_node()); + CHECK(doc.child(STR("node")).first_child().insert_copy_before(doc.child(STR("node")), doc.child(STR("node"))) == xml_node()); + CHECK(doc.insert_copy_before(doc, doc) == xml_node()); + CHECK(doc.insert_copy_before(xml_node(), doc.child(STR("node"))) == xml_node()); + CHECK(doc.insert_copy_before(doc.child(STR("node")), xml_node()) == xml_node()); + + xml_node n1 = doc.child(STR("node")).insert_copy_before(doc.child(STR("node")).child(STR("child")), doc.child(STR("node")).first_child()); + CHECK(n1); + CHECK_STRING(n1.name(), STR("child")); + CHECK_NODE(doc, STR("foo")); + + xml_node n2 = doc.child(STR("node")).insert_copy_before(doc.child(STR("node")).first_child(), doc.child(STR("node")).last_child()); + CHECK(n2 && n2 != n1); + CHECK_STRING(n2.name(), STR("child")); + CHECK_NODE(doc, STR("foo")); + + xml_node n3 = doc.child(STR("node")).insert_copy_before(doc.child(STR("node")).first_child().next_sibling(), doc.child(STR("node")).first_child()); + CHECK(n3 && n3 != n1 && n3 != n2); + CHECK_STRING(n3.value(), STR("foo")); + CHECK_NODE(doc, STR("foofoo")); +} + +TEST_XML(dom_node_copy_recursive, "foo") +{ + doc.child(STR("node")).append_copy(doc.child(STR("node"))); + CHECK_NODE(doc, STR("foofoo")); +} + +TEST_XML(dom_node_copy_crossdoc, "") +{ + xml_document newdoc; + newdoc.append_copy(doc.child(STR("node"))); + CHECK_NODE(doc, STR("")); + CHECK_NODE(newdoc, STR("")); +} + +TEST_XML_FLAGS(dom_node_copy_types, "pcdata", parse_default | parse_pi | parse_comments | parse_declaration) +{ + doc.append_copy(doc.child(STR("root"))); + CHECK_NODE(doc, STR("pcdatapcdata")); + + doc.insert_copy_before(doc.first_child(), doc.first_child()); + CHECK_NODE(doc, STR("pcdatapcdata")); +} + +TEST_XML(dom_attr_assign_large_number, "") +{ + xml_node node = doc.child(STR("node")); + + node.attribute(STR("attr1")) = FLT_MAX; + node.attribute(STR("attr2")) = DBL_MAX; + + CHECK(test_node(node, STR(""), STR(""), pugi::format_raw) || + test_node(node, STR(""), STR(""), pugi::format_raw)); +} + +TEST(dom_node_declaration_name) +{ + xml_document doc; + doc.append_child(node_declaration); + + // name 'xml' is auto-assigned + CHECK(doc.first_child().type() == node_declaration); + CHECK_STRING(doc.first_child().name(), STR("xml")); + + doc.insert_child_after(node_declaration, doc.first_child()); + doc.insert_child_before(node_declaration, doc.first_child()); + + CHECK_NODE(doc, STR("")); +} + +TEST(dom_node_declaration_top_level) +{ + xml_document doc; + doc.append_child().set_name(STR("node")); + + xml_node node = doc.first_child(); + node.append_child(node_pcdata).set_value(STR("text")); + + CHECK(node.insert_child_before(node_declaration, node.first_child()) == xml_node()); + CHECK(node.insert_child_after(node_declaration, node.first_child()) == xml_node()); + CHECK(node.append_child(node_declaration) == xml_node()); + + CHECK_NODE(doc, STR("text")); + + CHECK(doc.insert_child_before(node_declaration, node)); + CHECK(doc.insert_child_after(node_declaration, node)); + CHECK(doc.append_child(node_declaration)); + + CHECK_NODE(doc, STR("text")); +} + +TEST(dom_node_declaration_copy) +{ + xml_document doc; + doc.append_child(node_declaration); + + doc.append_child().set_name(STR("node")); + + doc.last_child().append_copy(doc.first_child()); + + CHECK_NODE(doc, STR("")); +} + +TEST(dom_string_out_of_memory) +{ + unsigned int length = 65536; + + char_t* string = new char_t[length + 1]; + for (unsigned int i = 0; i < length; ++i) string[i] = 'a'; + string[length] = 0; + + xml_document doc; + xml_node node = doc.append_child(); + xml_attribute attr = node.append_attribute(STR("a")); + xml_node text = node.append_child(node_pcdata); + + // no value => long value + test_runner::_memory_fail_threshold = 32; + + CHECK(!node.set_name(string)); + CHECK(!text.set_value(string)); + CHECK(!attr.set_name(string)); + CHECK(!attr.set_value(string)); + + // set some names/values + test_runner::_memory_fail_threshold = 0; + + node.set_name(STR("n")); + attr.set_value(STR("v")); + text.set_value(STR("t")); + + // some value => long value + test_runner::_memory_fail_threshold = 32; + + CHECK(!node.set_name(string)); + CHECK(!text.set_value(string)); + CHECK(!attr.set_name(string)); + CHECK(!attr.set_value(string)); + + // check that original state was preserved + test_runner::_memory_fail_threshold = 0; + + CHECK_NODE(doc, STR("t")); +} + +TEST(dom_node_out_of_memory) +{ + test_runner::_memory_fail_threshold = 65536; + + // exhaust memory limit + xml_document doc; + + xml_node n = doc.append_child(); + CHECK(n.set_name(STR("n"))); + + xml_attribute a = n.append_attribute(STR("a")); + CHECK(a); + + while (n.append_child(node_comment) || n.append_attribute(STR("b"))) + { + // nop + } + + // verify all node modification operations + CHECK(!n.append_child()); + CHECK(!n.insert_child_after(node_element, n.first_child())); + CHECK(!n.insert_child_before(node_element, n.first_child())); + CHECK(!n.append_attribute(STR(""))); + CHECK(!n.insert_attribute_after(STR(""), a)); + CHECK(!n.insert_attribute_before(STR(""), a)); + + // verify node copy operations + CHECK(!n.append_copy(n.first_child())); + CHECK(!n.insert_copy_after(n.first_child(), n.first_child())); + CHECK(!n.insert_copy_before(n.first_child(), n.first_child())); + CHECK(!n.append_copy(a)); + CHECK(!n.insert_copy_after(a, a)); + CHECK(!n.insert_copy_before(a, a)); +} diff --git a/tests/test_dom_traverse.cpp b/tests/test_dom_traverse.cpp index 896bf6f..8075dc3 100644 --- a/tests/test_dom_traverse.cpp +++ b/tests/test_dom_traverse.cpp @@ -1,756 +1,756 @@ -#define _CRT_SECURE_NO_WARNINGS -#define _SCL_SECURE_NO_WARNINGS - -#include "common.hpp" - -#include - -#include -#include - -#include -#include -#include -#include - -#include "helpers.hpp" - -#ifdef PUGIXML_NO_STL -template static I move_iter(I base, int n) -{ - if (n > 0) while (n--) ++base; - else while (n++) --base; - return base; -} -#else -template static I move_iter(I base, int n) -{ - std::advance(base, n); - return base; -} -#endif - -template static void generic_empty_test(const T& obj) -{ - T null; - - CHECK(null.empty()); - CHECK(!obj.empty()); -} - -TEST_XML(dom_attr_bool_ops, "") -{ - generic_bool_ops_test(doc.child(STR("node")).attribute(STR("attr"))); -} - -TEST_XML(dom_attr_eq_ops, "") -{ - generic_eq_ops_test(doc.child(STR("node")).attribute(STR("attr1")), doc.child(STR("node")).attribute(STR("attr2"))); -} - -TEST_XML(dom_attr_rel_ops, "") -{ - generic_rel_ops_test(doc.child(STR("node")).attribute(STR("attr1")), doc.child(STR("node")).attribute(STR("attr2"))); -} - -TEST_XML(dom_attr_empty, "") -{ - generic_empty_test(doc.child(STR("node")).attribute(STR("attr"))); -} - -TEST_XML(dom_attr_next_previous_attribute, "") -{ - xml_attribute attr1 = doc.child(STR("node")).attribute(STR("attr1")); - xml_attribute attr2 = doc.child(STR("node")).attribute(STR("attr2")); - - CHECK(attr1.next_attribute() == attr2); - CHECK(attr2.next_attribute() == xml_attribute()); - - CHECK(attr1.previous_attribute() == xml_attribute()); - CHECK(attr2.previous_attribute() == attr1); - - CHECK(xml_attribute().next_attribute() == xml_attribute()); - CHECK(xml_attribute().previous_attribute() == xml_attribute()); -} - -TEST_XML(dom_attr_name_value, "") -{ - xml_attribute attr = doc.child(STR("node")).attribute(STR("attr")); - - CHECK_NAME_VALUE(attr, STR("attr"), STR("1")); - CHECK_NAME_VALUE(xml_attribute(), STR(""), STR("")); -} - -TEST_XML(dom_attr_as_int, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(xml_attribute().as_int() == 0); - CHECK(node.attribute(STR("attr1")).as_int() == 1); - CHECK(node.attribute(STR("attr2")).as_int() == -1); - CHECK(node.attribute(STR("attr3")).as_int() == -2147483647 - 1); - CHECK(node.attribute(STR("attr4")).as_int() == 2147483647); -} - -TEST_XML(dom_attr_as_uint, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(xml_attribute().as_uint() == 0); - CHECK(node.attribute(STR("attr1")).as_uint() == 0); - CHECK(node.attribute(STR("attr2")).as_uint() == 1); - CHECK(node.attribute(STR("attr3")).as_uint() == 2147483647); - CHECK(node.attribute(STR("attr4")).as_uint() == 4294967295u); -} - -TEST_XML(dom_attr_as_float, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(xml_attribute().as_float() == 0); - CHECK_DOUBLE(node.attribute(STR("attr1")).as_float(), 0); - CHECK_DOUBLE(node.attribute(STR("attr2")).as_float(), 1); - CHECK_DOUBLE(node.attribute(STR("attr3")).as_float(), 0.12); - CHECK_DOUBLE(node.attribute(STR("attr4")).as_float(), -5.1); - CHECK_DOUBLE(node.attribute(STR("attr5")).as_float(), 3e-4); - CHECK_DOUBLE(node.attribute(STR("attr6")).as_float(), 3.14159265358979323846); -} - -TEST_XML(dom_attr_as_double, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(xml_attribute().as_double() == 0); - CHECK_DOUBLE(node.attribute(STR("attr1")).as_double(), 0); - CHECK_DOUBLE(node.attribute(STR("attr2")).as_double(), 1); - CHECK_DOUBLE(node.attribute(STR("attr3")).as_double(), 0.12); - CHECK_DOUBLE(node.attribute(STR("attr4")).as_double(), -5.1); - CHECK_DOUBLE(node.attribute(STR("attr5")).as_double(), 3e-4); - CHECK_DOUBLE(node.attribute(STR("attr6")).as_double(), 3.14159265358979323846); -} - -TEST_XML(dom_attr_as_bool, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(!xml_attribute().as_bool()); - CHECK(!node.attribute(STR("attr1")).as_bool()); - CHECK(node.attribute(STR("attr2")).as_bool()); - CHECK(node.attribute(STR("attr3")).as_bool()); - CHECK(node.attribute(STR("attr4")).as_bool()); - CHECK(node.attribute(STR("attr5")).as_bool()); - CHECK(node.attribute(STR("attr6")).as_bool()); - CHECK(!node.attribute(STR("attr7")).as_bool()); -} - -TEST_XML(dom_attr_iterator, "") -{ - xml_node node1 = doc.child(STR("node")).child(STR("node1")); - xml_node node2 = doc.child(STR("node")).child(STR("node2")); - xml_node node3 = doc.child(STR("node")).child(STR("node3")); - - CHECK(xml_node().attributes_begin() == xml_attribute_iterator()); - CHECK(xml_node().attributes_end() == xml_attribute_iterator()); - - CHECK(node1.attributes_begin() == xml_attribute_iterator(node1.attribute(STR("attr1")), node1)); - CHECK(move_iter(node1.attributes_begin(), 1) == node1.attributes_end()); - CHECK(move_iter(node1.attributes_end(), -1) == node1.attributes_begin()); - CHECK(*node1.attributes_begin() == node1.attribute(STR("attr1"))); - CHECK_STRING(node1.attributes_begin()->name(), STR("attr1")); - - CHECK(move_iter(node2.attributes_begin(), 2) == node2.attributes_end()); - CHECK(move_iter(node2.attributes_end(), -2) == node2.attributes_begin()); - - CHECK(node3.attributes_begin() != xml_attribute_iterator()); - CHECK(node3.attributes_begin() == node3.attributes_end()); - - xml_attribute_iterator it = xml_attribute_iterator(node2.attribute(STR("attr2")), node2); - xml_attribute_iterator itt = it; - - CHECK(itt++ == it); - CHECK(itt == node2.attributes_end()); - - CHECK(itt-- == node2.attributes_end()); - CHECK(itt == it); - - CHECK(++itt == node2.attributes_end()); - CHECK(itt == node2.attributes_end()); - - CHECK(--itt == it); - CHECK(itt == it); - - CHECK(++itt != it); -} - -TEST_XML(dom_attr_iterator_end, "") -{ - xml_node node1 = doc.child(STR("node")).child(STR("node1")); - xml_node node2 = doc.child(STR("node")).child(STR("node2")); - xml_node node3 = doc.child(STR("node")).child(STR("node3")); - - CHECK(node1.attributes_end() != node2.attributes_end() && node1.attributes_end() != node3.attributes_end() && node2.attributes_end() != node3.attributes_end()); - CHECK(node1.attributes_end() != xml_attribute_iterator() && node2.attributes_end() != xml_attribute_iterator() && node3.attributes_end() != xml_attribute_iterator()); -} - -TEST_XML(dom_attr_iterator_invalidate, "") -{ - xml_node node2 = doc.child(STR("node")).child(STR("node2")); - - xml_attribute_iterator it1 = node2.attributes_begin(); - xml_attribute_iterator it2 = move_iter(it1, 1); - xml_attribute_iterator it3 = move_iter(it2, 1); - - CHECK(it3 == node2.attributes_end()); - - // removing attr2, it2 is invalid now, it3 is still past-the-end - node2.remove_attribute(*it2); - - CHECK(node2.attributes_end() == it3); - CHECK(move_iter(it1, 1) == it3); - CHECK(move_iter(it3, -1) == it1); - CHECK_STRING(it1->name(), STR("attr1")); - - // adding attr2 back, it3 is still past-the-end! - xml_attribute_iterator it2new = xml_attribute_iterator(node2.append_attribute(STR("attr2-new")), node2); - - CHECK(node2.attributes_end() == it3); - CHECK(move_iter(it1, 1) == it2new); - CHECK(move_iter(it2new, 1) == it3); - CHECK(move_iter(it3, -1) == it2new); - CHECK_STRING(it2new->name(), STR("attr2-new")); - - // removing both attributes, it3 is now equal to the begin - node2.remove_attribute(*it1); - node2.remove_attribute(*it2new); - CHECK(!node2.first_attribute()); - - CHECK(node2.attributes_begin() == it3); - CHECK(node2.attributes_end() == it3); -} - -TEST_XML(dom_node_bool_ops, "") -{ - generic_bool_ops_test(doc.child(STR("node"))); -} - -TEST_XML(dom_node_eq_ops, "") -{ - generic_eq_ops_test(doc.child(STR("node")).child(STR("node1")), doc.child(STR("node")).child(STR("node2"))); -} - -TEST_XML(dom_node_rel_ops, "") -{ - generic_rel_ops_test(doc.child(STR("node")).child(STR("node1")), doc.child(STR("node")).child(STR("node2"))); -} - -TEST_XML(dom_node_empty, "") -{ - generic_empty_test(doc.child(STR("node"))); -} - -TEST_XML(dom_node_iterator, "") -{ - xml_node node1 = doc.child(STR("node")).child(STR("node1")); - xml_node node2 = doc.child(STR("node")).child(STR("node2")); - xml_node node3 = doc.child(STR("node")).child(STR("node3")); - - CHECK(xml_node().begin() == xml_node_iterator()); - CHECK(xml_node().end() == xml_node_iterator()); - - CHECK(node1.begin() == xml_node_iterator(node1.child(STR("child1")))); - CHECK(move_iter(node1.begin(), 1) == node1.end()); - CHECK(move_iter(node1.end(), -1) == node1.begin()); - CHECK(*node1.begin() == node1.child(STR("child1"))); - CHECK_STRING(node1.begin()->name(), STR("child1")); - - CHECK(move_iter(node2.begin(), 2) == node2.end()); - CHECK(move_iter(node2.end(), -2) == node2.begin()); - - CHECK(node3.begin() != xml_node_iterator()); - CHECK(node3.begin() == node3.end()); - - xml_node_iterator it = node2.child(STR("child2")); - xml_node_iterator itt = it; - - CHECK(itt++ == it); - CHECK(itt == node2.end()); - - CHECK(itt-- == node2.end()); - CHECK(itt == it); - - CHECK(++itt == node2.end()); - CHECK(itt == node2.end()); - - CHECK(--itt == it); - CHECK(itt == it); - - CHECK(++itt != it); -} - -TEST_XML(dom_node_iterator_end, "") -{ - xml_node node1 = doc.child(STR("node")).child(STR("node1")); - xml_node node2 = doc.child(STR("node")).child(STR("node2")); - xml_node node3 = doc.child(STR("node")).child(STR("node3")); - - CHECK(node1.end() != node2.end() && node1.end() != node3.end() && node2.end() != node3.end()); - CHECK(node1.end() != xml_node_iterator() && node2.end() != xml_node_iterator() && node3.end() != xml_node_iterator()); -} - -TEST_XML(dom_node_iterator_invalidate, "") -{ - xml_node node2 = doc.child(STR("node")).child(STR("node2")); - - xml_node_iterator it1 = node2.begin(); - xml_node_iterator it2 = move_iter(it1, 1); - xml_node_iterator it3 = move_iter(it2, 1); - - CHECK(it3 == node2.end()); - - // removing child2, it2 is invalid now, it3 is still past-the-end - node2.remove_child(*it2); - - CHECK(node2.end() == it3); - CHECK(move_iter(it1, 1) == it3); - CHECK(move_iter(it3, -1) == it1); - CHECK_STRING(it1->name(), STR("child1")); - - // adding attr2 back, it3 is still past-the-end! - xml_node_iterator it2new = node2.append_child(); - it2new->set_name(STR("child2-new")); - - CHECK(node2.end() == it3); - CHECK(move_iter(it1, 1) == it2new); - CHECK(move_iter(it2new, 1) == it3); - CHECK(move_iter(it3, -1) == it2new); - CHECK_STRING(it2new->name(), STR("child2-new")); - - // removing both nodes, it3 is now equal to the begin - node2.remove_child(*it1); - node2.remove_child(*it2new); - CHECK(!node2.first_child()); - - CHECK(node2.begin() == it3); - CHECK(node2.end() == it3); -} - -TEST_XML(dom_node_parent, "") -{ - CHECK(xml_node().parent() == xml_node()); - CHECK(doc.child(STR("node")).child(STR("child")).parent() == doc.child(STR("node"))); - CHECK(doc.child(STR("node")).parent() == doc); -} - -TEST_XML(dom_node_root, "") -{ - CHECK(xml_node().root() == xml_node()); - CHECK(doc.child(STR("node")).child(STR("child")).root() == doc); - CHECK(doc.child(STR("node")).root() == doc); -} - -TEST_XML_FLAGS(dom_node_type, "pcdata", parse_default | parse_pi | parse_comments | parse_declaration) -{ - CHECK(xml_node().type() == node_null); - CHECK(doc.type() == node_document); - - xml_node_iterator it = doc.begin(); - - CHECK((it++)->type() == node_declaration); - CHECK((it++)->type() == node_pi); - CHECK((it++)->type() == node_comment); - CHECK((it++)->type() == node_element); - - xml_node_iterator cit = doc.child(STR("node")).begin(); - - CHECK((cit++)->type() == node_pcdata); - CHECK((cit++)->type() == node_cdata); -} - -TEST_XML_FLAGS(dom_node_name_value, "pcdata", parse_default | parse_pi | parse_comments | parse_declaration) -{ - CHECK_NAME_VALUE(xml_node(), STR(""), STR("")); - CHECK_NAME_VALUE(doc, STR(""), STR("")); - - xml_node_iterator it = doc.begin(); - - CHECK_NAME_VALUE(*it++, STR("xml"), STR("")); - CHECK_NAME_VALUE(*it++, STR("pi"), STR("")); - CHECK_NAME_VALUE(*it++, STR(""), STR("comment")); - CHECK_NAME_VALUE(*it++, STR("node"), STR("")); - - xml_node_iterator cit = doc.child(STR("node")).begin(); - - CHECK_NAME_VALUE(*cit++, STR(""), STR("pcdata")); - CHECK_NAME_VALUE(*cit++, STR(""), STR("cdata")); -} - -TEST_XML(dom_node_child, "") -{ - CHECK(xml_node().child(STR("n")) == xml_node()); - - CHECK(doc.child(STR("n")) == xml_node()); - CHECK_NAME_VALUE(doc.child(STR("node")), STR("node"), STR("")); - CHECK(doc.child(STR("node")).child(STR("child2")) == doc.child(STR("node")).last_child()); -} - -TEST_XML(dom_node_attribute, "") -{ - CHECK(xml_node().attribute(STR("a")) == xml_attribute()); - - xml_node node = doc.child(STR("node")); - - CHECK(node.attribute(STR("n")) == xml_attribute()); - CHECK_NAME_VALUE(node.attribute(STR("attr1")), STR("attr1"), STR("0")); - CHECK(node.attribute(STR("attr2")) == node.last_attribute()); -} - -TEST_XML(dom_node_next_previous_sibling, "") -{ - CHECK(xml_node().next_sibling() == xml_node()); - CHECK(xml_node().next_sibling(STR("n")) == xml_node()); - - CHECK(xml_node().previous_sibling() == xml_node()); - CHECK(xml_node().previous_sibling(STR("n")) == xml_node()); - - xml_node child1 = doc.child(STR("node")).child(STR("child1")); - xml_node child2 = doc.child(STR("node")).child(STR("child2")); - xml_node child3 = doc.child(STR("node")).child(STR("child3")); - - CHECK(child1.next_sibling() == child2); - CHECK(child3.next_sibling() == xml_node()); - - CHECK(child1.previous_sibling() == xml_node()); - CHECK(child3.previous_sibling() == child2); - - CHECK(child1.next_sibling(STR("child3")) == child3); - CHECK(child1.next_sibling(STR("child")) == xml_node()); - - CHECK(child3.previous_sibling(STR("child1")) == child1); - CHECK(child3.previous_sibling(STR("child")) == xml_node()); -} - -TEST_XML(dom_node_child_value, "value1value2value4") -{ - CHECK_STRING(xml_node().child_value(), STR("")); - CHECK_STRING(xml_node().child_value(STR("n")), STR("")); - - xml_node node = doc.child(STR("node")); - - CHECK_STRING(node.child_value(), STR("value4")); - CHECK_STRING(node.child(STR("child1")).child_value(), STR("value1")); - CHECK_STRING(node.child(STR("child2")).child_value(), STR("value2")); - CHECK_STRING(node.child(STR("child3")).child_value(), STR("value3")); - CHECK_STRING(node.child_value(STR("child3")), STR("value3")); -} - -TEST_XML(dom_node_first_last_attribute, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(node.first_attribute() == node.attribute(STR("attr1"))); - CHECK(node.last_attribute() == node.attribute(STR("attr2"))); - - CHECK(xml_node().first_attribute() == xml_attribute()); - CHECK(xml_node().last_attribute() == xml_attribute()); - - CHECK(doc.first_attribute() == xml_attribute()); - CHECK(doc.last_attribute() == xml_attribute()); -} - -TEST_XML(dom_node_first_last_child, "") -{ - xml_node node = doc.child(STR("node")); - - CHECK(node.first_child() == node.child(STR("child1"))); - CHECK(node.last_child() == node.child(STR("child2"))); - - CHECK(xml_node().first_child() == xml_node()); - CHECK(xml_node().last_child() == xml_node()); - - CHECK(doc.first_child() == node); - CHECK(doc.last_child() == node); -} - -TEST_XML(dom_node_find_child_by_attribute, "") -{ - CHECK(xml_node().find_child_by_attribute(STR("name"), STR("attr"), STR("value")) == xml_node()); - CHECK(xml_node().find_child_by_attribute(STR("attr"), STR("value")) == xml_node()); - - xml_node node = doc.child(STR("node")); - - CHECK(node.find_child_by_attribute(STR("child2"), STR("attr"), STR("value3")) == node.last_child()); - CHECK(node.find_child_by_attribute(STR("child2"), STR("attr3"), STR("value3")) == xml_node()); - CHECK(node.find_child_by_attribute(STR("attr"), STR("value2")) == node.child(STR("child2"))); - CHECK(node.find_child_by_attribute(STR("attr3"), STR("value")) == xml_node()); -} - -struct find_predicate_const -{ - bool result; - - find_predicate_const(bool result): result(result) - { - } - - template bool operator()(const T&) const - { - return result; - } -}; - -struct find_predicate_prefix -{ - const pugi::char_t* prefix; - - find_predicate_prefix(const pugi::char_t* prefix): prefix(prefix) - { - } - - template bool operator()(const T& obj) const - { - #ifdef PUGIXML_WCHAR_MODE - // can't use wcsncmp here because of a bug in DMC - return std::basic_string(obj.name()).compare(0, wcslen(prefix), prefix) == 0; - #else - return strncmp(obj.name(), prefix, strlen(prefix)) == 0; - #endif - } -}; - -TEST_XML(dom_node_find_attribute, "") -{ - CHECK(xml_node().find_attribute(find_predicate_const(true)) == xml_attribute()); - - xml_node node = doc.child(STR("node")); - - CHECK(doc.find_attribute(find_predicate_const(true)) == xml_attribute()); - CHECK(node.find_attribute(find_predicate_const(true)) == node.first_attribute()); - CHECK(node.find_attribute(find_predicate_const(false)) == xml_attribute()); - CHECK(node.find_attribute(find_predicate_prefix(STR("attr2"))) == node.last_attribute()); - CHECK(node.find_attribute(find_predicate_prefix(STR("attr"))) == node.first_attribute()); -} - -TEST_XML(dom_node_find_child, "") -{ - CHECK(xml_node().find_child(find_predicate_const(true)) == xml_node()); - - xml_node node = doc.child(STR("node")); - - CHECK(node.child(STR("node")).child(STR("child1")).find_child(find_predicate_const(true)) == xml_node()); - CHECK(node.find_child(find_predicate_const(true)) == node.first_child()); - CHECK(node.find_child(find_predicate_const(false)) == xml_node()); - CHECK(node.find_child(find_predicate_prefix(STR("child2"))) == node.last_child()); - CHECK(node.find_child(find_predicate_prefix(STR("child"))) == node.first_child()); -} - -TEST_XML(dom_node_find_node, "") -{ - CHECK(xml_node().find_node(find_predicate_const(true)) == xml_node()); - - xml_node node = doc.child(STR("node")); - - CHECK(node.child(STR("node")).child(STR("child1")).find_node(find_predicate_const(true)) == xml_node()); - CHECK(node.find_node(find_predicate_const(true)) == node.first_child()); - CHECK(node.find_node(find_predicate_const(false)) == xml_node()); - CHECK(node.find_node(find_predicate_prefix(STR("child2"))) == node.last_child()); - CHECK(node.find_node(find_predicate_prefix(STR("child"))) == node.first_child()); - CHECK(doc.find_node(find_predicate_prefix(STR("child"))) == node.first_child()); - CHECK(doc.find_node(find_predicate_prefix(STR("child2"))) == node.last_child()); - CHECK(doc.find_node(find_predicate_prefix(STR("child3"))) == xml_node()); -} - -#ifndef PUGIXML_NO_STL -TEST_XML(dom_node_path, "text") -{ - CHECK(xml_node().path() == STR("")); - - CHECK(doc.path() == STR("")); - CHECK(doc.child(STR("node")).path() == STR("/node")); - CHECK(doc.child(STR("node")).child(STR("child1")).path() == STR("/node/child1")); - CHECK(doc.child(STR("node")).child(STR("child1")).child(STR("child2")).path() == STR("/node/child1/child2")); - CHECK(doc.child(STR("node")).child(STR("child1")).first_child().path() == STR("/node/child1/")); - - CHECK(doc.child(STR("node")).child(STR("child1")).path('\\') == STR("\\node\\child1")); -} -#endif - -TEST_XML(dom_node_first_element_by_path, "text") -{ - CHECK(xml_node().first_element_by_path(STR("/")) == xml_node()); - - CHECK(doc.first_element_by_path(STR("")) == doc); - CHECK(doc.first_element_by_path(STR("/")) == doc); - - CHECK(doc.first_element_by_path(STR("/node/")) == doc.child(STR("node"))); - CHECK(doc.first_element_by_path(STR("node/")) == doc.child(STR("node"))); - CHECK(doc.first_element_by_path(STR("node")) == doc.child(STR("node"))); - CHECK(doc.first_element_by_path(STR("/node")) == doc.child(STR("node"))); - -#ifndef PUGIXML_NO_STL - CHECK(doc.first_element_by_path(STR("/node/child1/child2")).path() == STR("/node/child1/child2")); -#endif - - CHECK(doc.first_element_by_path(STR("/node/child2")) == xml_node()); - - CHECK(doc.first_element_by_path(STR("\\node\\child1"), '\\') == doc.child(STR("node")).child(STR("child1"))); - - CHECK(doc.child(STR("node")).first_element_by_path(STR("..")) == doc); - CHECK(doc.child(STR("node")).first_element_by_path(STR(".")) == doc.child(STR("node"))); - - CHECK(doc.child(STR("node")).first_element_by_path(STR("../node/./child1/../.")) == doc.child(STR("node"))); - - CHECK(doc.child(STR("node")).first_element_by_path(STR("child1")) == doc.child(STR("node")).child(STR("child1"))); - CHECK(doc.child(STR("node")).first_element_by_path(STR("child1/")) == doc.child(STR("node")).child(STR("child1"))); - CHECK(doc.child(STR("node")).first_element_by_path(STR("child")) == xml_node()); - CHECK(doc.child(STR("node")).first_element_by_path(STR("child11")) == xml_node()); -} - -struct test_walker: xml_tree_walker -{ - std::basic_string log; - unsigned int call_count; - unsigned int stop_count; - - test_walker(unsigned int stop_count = 0): call_count(0), stop_count(stop_count) - { - } - - std::basic_string depthstr() const - { - char buf[32]; - sprintf(buf, "%d", depth()); - - #ifdef PUGIXML_WCHAR_MODE - wchar_t wbuf[32]; - std::copy(buf, buf + strlen(buf) + 1, &wbuf[0]); - - return std::basic_string(wbuf); - #else - return std::basic_string(buf); - #endif - } - - virtual bool begin(xml_node& node) - { - log += STR("|"); - log += depthstr(); - log += STR(" <"); - log += node.name(); - log += STR("="); - log += node.value(); - - return ++call_count != stop_count && xml_tree_walker::begin(node); - } - - virtual bool for_each(xml_node& node) - { - log += STR("|"); - log += depthstr(); - log += STR(" !"); - log += node.name(); - log += STR("="); - log += node.value(); - - return ++call_count != stop_count && xml_tree_walker::end(node); - } - - virtual bool end(xml_node& node) - { - log += STR("|"); - log += depthstr(); - log += STR(" >"); - log += node.name(); - log += STR("="); - log += node.value(); - - return ++call_count != stop_count; - } -}; - -TEST_XML(dom_node_traverse, "text") -{ - test_walker walker; - - CHECK(doc.traverse(walker)); - - CHECK(walker.call_count == 5); - CHECK(walker.log == STR("|-1 <=|0 !node=|1 !child=|2 !=text|-1 >=")); -} - -TEST_XML(dom_node_traverse_siblings, "text") -{ - test_walker walker; - - CHECK(doc.traverse(walker)); - - CHECK(walker.call_count == 7); - CHECK(walker.log == STR("|-1 <=|0 !node=|1 !child=|1 !child=|2 !=text|1 !child=|-1 >=")); -} - -TEST(dom_node_traverse_empty) -{ - test_walker walker; - - CHECK(xml_node().traverse(walker)); - - CHECK(walker.call_count == 2); - CHECK(walker.log == STR("|-1 <=|-1 >=")); -} - -TEST_XML(dom_node_traverse_child, "text") -{ - test_walker walker; - - CHECK(doc.child(STR("node")).traverse(walker)); - - CHECK(walker.call_count == 4); - CHECK(walker.log == STR("|-1 node=")); -} - -TEST_XML(dom_node_traverse_stop_begin, "text") -{ - test_walker walker(1); - - CHECK(!doc.traverse(walker)); - - CHECK(walker.call_count == 1); - CHECK(walker.log == STR("|-1 <=")); -} - -TEST_XML(dom_node_traverse_stop_for_each, "text") -{ - test_walker walker(3); - - CHECK(!doc.traverse(walker)); - - CHECK(walker.call_count == 3); - CHECK(walker.log == STR("|-1 <=|0 !node=|1 !child=")); -} - -TEST_XML(dom_node_traverse_stop_end, "text") -{ - test_walker walker(5); - - CHECK(!doc.traverse(walker)); - - CHECK(walker.call_count == 5); - CHECK(walker.log == STR("|-1 <=|0 !node=|1 !child=|2 !=text|-1 >=")); -} - -TEST_XML_FLAGS(dom_offset_debug, "pcdata", parse_default | parse_pi | parse_comments | parse_declaration) -{ - CHECK(xml_node().offset_debug() == -1); - CHECK(doc.offset_debug() == 0); - - xml_node_iterator it = doc.begin(); - - CHECK((it++)->offset_debug() == 2); - CHECK((it++)->offset_debug() == 9); - CHECK((it++)->offset_debug() == 17); - CHECK((it++)->offset_debug() == 28); - - xml_node_iterator cit = doc.child(STR("node")).begin(); - - CHECK((cit++)->offset_debug() == 33); - CHECK((cit++)->offset_debug() == 48); -} +#define _CRT_SECURE_NO_WARNINGS +#define _SCL_SECURE_NO_WARNINGS + +#include "common.hpp" + +#include + +#include +#include + +#include +#include +#include +#include + +#include "helpers.hpp" + +#ifdef PUGIXML_NO_STL +template static I move_iter(I base, int n) +{ + if (n > 0) while (n--) ++base; + else while (n++) --base; + return base; +} +#else +template static I move_iter(I base, int n) +{ + std::advance(base, n); + return base; +} +#endif + +template static void generic_empty_test(const T& obj) +{ + T null; + + CHECK(null.empty()); + CHECK(!obj.empty()); +} + +TEST_XML(dom_attr_bool_ops, "") +{ + generic_bool_ops_test(doc.child(STR("node")).attribute(STR("attr"))); +} + +TEST_XML(dom_attr_eq_ops, "") +{ + generic_eq_ops_test(doc.child(STR("node")).attribute(STR("attr1")), doc.child(STR("node")).attribute(STR("attr2"))); +} + +TEST_XML(dom_attr_rel_ops, "") +{ + generic_rel_ops_test(doc.child(STR("node")).attribute(STR("attr1")), doc.child(STR("node")).attribute(STR("attr2"))); +} + +TEST_XML(dom_attr_empty, "") +{ + generic_empty_test(doc.child(STR("node")).attribute(STR("attr"))); +} + +TEST_XML(dom_attr_next_previous_attribute, "") +{ + xml_attribute attr1 = doc.child(STR("node")).attribute(STR("attr1")); + xml_attribute attr2 = doc.child(STR("node")).attribute(STR("attr2")); + + CHECK(attr1.next_attribute() == attr2); + CHECK(attr2.next_attribute() == xml_attribute()); + + CHECK(attr1.previous_attribute() == xml_attribute()); + CHECK(attr2.previous_attribute() == attr1); + + CHECK(xml_attribute().next_attribute() == xml_attribute()); + CHECK(xml_attribute().previous_attribute() == xml_attribute()); +} + +TEST_XML(dom_attr_name_value, "") +{ + xml_attribute attr = doc.child(STR("node")).attribute(STR("attr")); + + CHECK_NAME_VALUE(attr, STR("attr"), STR("1")); + CHECK_NAME_VALUE(xml_attribute(), STR(""), STR("")); +} + +TEST_XML(dom_attr_as_int, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(xml_attribute().as_int() == 0); + CHECK(node.attribute(STR("attr1")).as_int() == 1); + CHECK(node.attribute(STR("attr2")).as_int() == -1); + CHECK(node.attribute(STR("attr3")).as_int() == -2147483647 - 1); + CHECK(node.attribute(STR("attr4")).as_int() == 2147483647); +} + +TEST_XML(dom_attr_as_uint, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(xml_attribute().as_uint() == 0); + CHECK(node.attribute(STR("attr1")).as_uint() == 0); + CHECK(node.attribute(STR("attr2")).as_uint() == 1); + CHECK(node.attribute(STR("attr3")).as_uint() == 2147483647); + CHECK(node.attribute(STR("attr4")).as_uint() == 4294967295u); +} + +TEST_XML(dom_attr_as_float, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(xml_attribute().as_float() == 0); + CHECK_DOUBLE(node.attribute(STR("attr1")).as_float(), 0); + CHECK_DOUBLE(node.attribute(STR("attr2")).as_float(), 1); + CHECK_DOUBLE(node.attribute(STR("attr3")).as_float(), 0.12); + CHECK_DOUBLE(node.attribute(STR("attr4")).as_float(), -5.1); + CHECK_DOUBLE(node.attribute(STR("attr5")).as_float(), 3e-4); + CHECK_DOUBLE(node.attribute(STR("attr6")).as_float(), 3.14159265358979323846); +} + +TEST_XML(dom_attr_as_double, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(xml_attribute().as_double() == 0); + CHECK_DOUBLE(node.attribute(STR("attr1")).as_double(), 0); + CHECK_DOUBLE(node.attribute(STR("attr2")).as_double(), 1); + CHECK_DOUBLE(node.attribute(STR("attr3")).as_double(), 0.12); + CHECK_DOUBLE(node.attribute(STR("attr4")).as_double(), -5.1); + CHECK_DOUBLE(node.attribute(STR("attr5")).as_double(), 3e-4); + CHECK_DOUBLE(node.attribute(STR("attr6")).as_double(), 3.14159265358979323846); +} + +TEST_XML(dom_attr_as_bool, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(!xml_attribute().as_bool()); + CHECK(!node.attribute(STR("attr1")).as_bool()); + CHECK(node.attribute(STR("attr2")).as_bool()); + CHECK(node.attribute(STR("attr3")).as_bool()); + CHECK(node.attribute(STR("attr4")).as_bool()); + CHECK(node.attribute(STR("attr5")).as_bool()); + CHECK(node.attribute(STR("attr6")).as_bool()); + CHECK(!node.attribute(STR("attr7")).as_bool()); +} + +TEST_XML(dom_attr_iterator, "") +{ + xml_node node1 = doc.child(STR("node")).child(STR("node1")); + xml_node node2 = doc.child(STR("node")).child(STR("node2")); + xml_node node3 = doc.child(STR("node")).child(STR("node3")); + + CHECK(xml_node().attributes_begin() == xml_attribute_iterator()); + CHECK(xml_node().attributes_end() == xml_attribute_iterator()); + + CHECK(node1.attributes_begin() == xml_attribute_iterator(node1.attribute(STR("attr1")), node1)); + CHECK(move_iter(node1.attributes_begin(), 1) == node1.attributes_end()); + CHECK(move_iter(node1.attributes_end(), -1) == node1.attributes_begin()); + CHECK(*node1.attributes_begin() == node1.attribute(STR("attr1"))); + CHECK_STRING(node1.attributes_begin()->name(), STR("attr1")); + + CHECK(move_iter(node2.attributes_begin(), 2) == node2.attributes_end()); + CHECK(move_iter(node2.attributes_end(), -2) == node2.attributes_begin()); + + CHECK(node3.attributes_begin() != xml_attribute_iterator()); + CHECK(node3.attributes_begin() == node3.attributes_end()); + + xml_attribute_iterator it = xml_attribute_iterator(node2.attribute(STR("attr2")), node2); + xml_attribute_iterator itt = it; + + CHECK(itt++ == it); + CHECK(itt == node2.attributes_end()); + + CHECK(itt-- == node2.attributes_end()); + CHECK(itt == it); + + CHECK(++itt == node2.attributes_end()); + CHECK(itt == node2.attributes_end()); + + CHECK(--itt == it); + CHECK(itt == it); + + CHECK(++itt != it); +} + +TEST_XML(dom_attr_iterator_end, "") +{ + xml_node node1 = doc.child(STR("node")).child(STR("node1")); + xml_node node2 = doc.child(STR("node")).child(STR("node2")); + xml_node node3 = doc.child(STR("node")).child(STR("node3")); + + CHECK(node1.attributes_end() != node2.attributes_end() && node1.attributes_end() != node3.attributes_end() && node2.attributes_end() != node3.attributes_end()); + CHECK(node1.attributes_end() != xml_attribute_iterator() && node2.attributes_end() != xml_attribute_iterator() && node3.attributes_end() != xml_attribute_iterator()); +} + +TEST_XML(dom_attr_iterator_invalidate, "") +{ + xml_node node2 = doc.child(STR("node")).child(STR("node2")); + + xml_attribute_iterator it1 = node2.attributes_begin(); + xml_attribute_iterator it2 = move_iter(it1, 1); + xml_attribute_iterator it3 = move_iter(it2, 1); + + CHECK(it3 == node2.attributes_end()); + + // removing attr2, it2 is invalid now, it3 is still past-the-end + node2.remove_attribute(*it2); + + CHECK(node2.attributes_end() == it3); + CHECK(move_iter(it1, 1) == it3); + CHECK(move_iter(it3, -1) == it1); + CHECK_STRING(it1->name(), STR("attr1")); + + // adding attr2 back, it3 is still past-the-end! + xml_attribute_iterator it2new = xml_attribute_iterator(node2.append_attribute(STR("attr2-new")), node2); + + CHECK(node2.attributes_end() == it3); + CHECK(move_iter(it1, 1) == it2new); + CHECK(move_iter(it2new, 1) == it3); + CHECK(move_iter(it3, -1) == it2new); + CHECK_STRING(it2new->name(), STR("attr2-new")); + + // removing both attributes, it3 is now equal to the begin + node2.remove_attribute(*it1); + node2.remove_attribute(*it2new); + CHECK(!node2.first_attribute()); + + CHECK(node2.attributes_begin() == it3); + CHECK(node2.attributes_end() == it3); +} + +TEST_XML(dom_node_bool_ops, "") +{ + generic_bool_ops_test(doc.child(STR("node"))); +} + +TEST_XML(dom_node_eq_ops, "") +{ + generic_eq_ops_test(doc.child(STR("node")).child(STR("node1")), doc.child(STR("node")).child(STR("node2"))); +} + +TEST_XML(dom_node_rel_ops, "") +{ + generic_rel_ops_test(doc.child(STR("node")).child(STR("node1")), doc.child(STR("node")).child(STR("node2"))); +} + +TEST_XML(dom_node_empty, "") +{ + generic_empty_test(doc.child(STR("node"))); +} + +TEST_XML(dom_node_iterator, "") +{ + xml_node node1 = doc.child(STR("node")).child(STR("node1")); + xml_node node2 = doc.child(STR("node")).child(STR("node2")); + xml_node node3 = doc.child(STR("node")).child(STR("node3")); + + CHECK(xml_node().begin() == xml_node_iterator()); + CHECK(xml_node().end() == xml_node_iterator()); + + CHECK(node1.begin() == xml_node_iterator(node1.child(STR("child1")))); + CHECK(move_iter(node1.begin(), 1) == node1.end()); + CHECK(move_iter(node1.end(), -1) == node1.begin()); + CHECK(*node1.begin() == node1.child(STR("child1"))); + CHECK_STRING(node1.begin()->name(), STR("child1")); + + CHECK(move_iter(node2.begin(), 2) == node2.end()); + CHECK(move_iter(node2.end(), -2) == node2.begin()); + + CHECK(node3.begin() != xml_node_iterator()); + CHECK(node3.begin() == node3.end()); + + xml_node_iterator it = node2.child(STR("child2")); + xml_node_iterator itt = it; + + CHECK(itt++ == it); + CHECK(itt == node2.end()); + + CHECK(itt-- == node2.end()); + CHECK(itt == it); + + CHECK(++itt == node2.end()); + CHECK(itt == node2.end()); + + CHECK(--itt == it); + CHECK(itt == it); + + CHECK(++itt != it); +} + +TEST_XML(dom_node_iterator_end, "") +{ + xml_node node1 = doc.child(STR("node")).child(STR("node1")); + xml_node node2 = doc.child(STR("node")).child(STR("node2")); + xml_node node3 = doc.child(STR("node")).child(STR("node3")); + + CHECK(node1.end() != node2.end() && node1.end() != node3.end() && node2.end() != node3.end()); + CHECK(node1.end() != xml_node_iterator() && node2.end() != xml_node_iterator() && node3.end() != xml_node_iterator()); +} + +TEST_XML(dom_node_iterator_invalidate, "") +{ + xml_node node2 = doc.child(STR("node")).child(STR("node2")); + + xml_node_iterator it1 = node2.begin(); + xml_node_iterator it2 = move_iter(it1, 1); + xml_node_iterator it3 = move_iter(it2, 1); + + CHECK(it3 == node2.end()); + + // removing child2, it2 is invalid now, it3 is still past-the-end + node2.remove_child(*it2); + + CHECK(node2.end() == it3); + CHECK(move_iter(it1, 1) == it3); + CHECK(move_iter(it3, -1) == it1); + CHECK_STRING(it1->name(), STR("child1")); + + // adding attr2 back, it3 is still past-the-end! + xml_node_iterator it2new = node2.append_child(); + it2new->set_name(STR("child2-new")); + + CHECK(node2.end() == it3); + CHECK(move_iter(it1, 1) == it2new); + CHECK(move_iter(it2new, 1) == it3); + CHECK(move_iter(it3, -1) == it2new); + CHECK_STRING(it2new->name(), STR("child2-new")); + + // removing both nodes, it3 is now equal to the begin + node2.remove_child(*it1); + node2.remove_child(*it2new); + CHECK(!node2.first_child()); + + CHECK(node2.begin() == it3); + CHECK(node2.end() == it3); +} + +TEST_XML(dom_node_parent, "") +{ + CHECK(xml_node().parent() == xml_node()); + CHECK(doc.child(STR("node")).child(STR("child")).parent() == doc.child(STR("node"))); + CHECK(doc.child(STR("node")).parent() == doc); +} + +TEST_XML(dom_node_root, "") +{ + CHECK(xml_node().root() == xml_node()); + CHECK(doc.child(STR("node")).child(STR("child")).root() == doc); + CHECK(doc.child(STR("node")).root() == doc); +} + +TEST_XML_FLAGS(dom_node_type, "pcdata", parse_default | parse_pi | parse_comments | parse_declaration) +{ + CHECK(xml_node().type() == node_null); + CHECK(doc.type() == node_document); + + xml_node_iterator it = doc.begin(); + + CHECK((it++)->type() == node_declaration); + CHECK((it++)->type() == node_pi); + CHECK((it++)->type() == node_comment); + CHECK((it++)->type() == node_element); + + xml_node_iterator cit = doc.child(STR("node")).begin(); + + CHECK((cit++)->type() == node_pcdata); + CHECK((cit++)->type() == node_cdata); +} + +TEST_XML_FLAGS(dom_node_name_value, "pcdata", parse_default | parse_pi | parse_comments | parse_declaration) +{ + CHECK_NAME_VALUE(xml_node(), STR(""), STR("")); + CHECK_NAME_VALUE(doc, STR(""), STR("")); + + xml_node_iterator it = doc.begin(); + + CHECK_NAME_VALUE(*it++, STR("xml"), STR("")); + CHECK_NAME_VALUE(*it++, STR("pi"), STR("")); + CHECK_NAME_VALUE(*it++, STR(""), STR("comment")); + CHECK_NAME_VALUE(*it++, STR("node"), STR("")); + + xml_node_iterator cit = doc.child(STR("node")).begin(); + + CHECK_NAME_VALUE(*cit++, STR(""), STR("pcdata")); + CHECK_NAME_VALUE(*cit++, STR(""), STR("cdata")); +} + +TEST_XML(dom_node_child, "") +{ + CHECK(xml_node().child(STR("n")) == xml_node()); + + CHECK(doc.child(STR("n")) == xml_node()); + CHECK_NAME_VALUE(doc.child(STR("node")), STR("node"), STR("")); + CHECK(doc.child(STR("node")).child(STR("child2")) == doc.child(STR("node")).last_child()); +} + +TEST_XML(dom_node_attribute, "") +{ + CHECK(xml_node().attribute(STR("a")) == xml_attribute()); + + xml_node node = doc.child(STR("node")); + + CHECK(node.attribute(STR("n")) == xml_attribute()); + CHECK_NAME_VALUE(node.attribute(STR("attr1")), STR("attr1"), STR("0")); + CHECK(node.attribute(STR("attr2")) == node.last_attribute()); +} + +TEST_XML(dom_node_next_previous_sibling, "") +{ + CHECK(xml_node().next_sibling() == xml_node()); + CHECK(xml_node().next_sibling(STR("n")) == xml_node()); + + CHECK(xml_node().previous_sibling() == xml_node()); + CHECK(xml_node().previous_sibling(STR("n")) == xml_node()); + + xml_node child1 = doc.child(STR("node")).child(STR("child1")); + xml_node child2 = doc.child(STR("node")).child(STR("child2")); + xml_node child3 = doc.child(STR("node")).child(STR("child3")); + + CHECK(child1.next_sibling() == child2); + CHECK(child3.next_sibling() == xml_node()); + + CHECK(child1.previous_sibling() == xml_node()); + CHECK(child3.previous_sibling() == child2); + + CHECK(child1.next_sibling(STR("child3")) == child3); + CHECK(child1.next_sibling(STR("child")) == xml_node()); + + CHECK(child3.previous_sibling(STR("child1")) == child1); + CHECK(child3.previous_sibling(STR("child")) == xml_node()); +} + +TEST_XML(dom_node_child_value, "value1value2value4") +{ + CHECK_STRING(xml_node().child_value(), STR("")); + CHECK_STRING(xml_node().child_value(STR("n")), STR("")); + + xml_node node = doc.child(STR("node")); + + CHECK_STRING(node.child_value(), STR("value4")); + CHECK_STRING(node.child(STR("child1")).child_value(), STR("value1")); + CHECK_STRING(node.child(STR("child2")).child_value(), STR("value2")); + CHECK_STRING(node.child(STR("child3")).child_value(), STR("value3")); + CHECK_STRING(node.child_value(STR("child3")), STR("value3")); +} + +TEST_XML(dom_node_first_last_attribute, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(node.first_attribute() == node.attribute(STR("attr1"))); + CHECK(node.last_attribute() == node.attribute(STR("attr2"))); + + CHECK(xml_node().first_attribute() == xml_attribute()); + CHECK(xml_node().last_attribute() == xml_attribute()); + + CHECK(doc.first_attribute() == xml_attribute()); + CHECK(doc.last_attribute() == xml_attribute()); +} + +TEST_XML(dom_node_first_last_child, "") +{ + xml_node node = doc.child(STR("node")); + + CHECK(node.first_child() == node.child(STR("child1"))); + CHECK(node.last_child() == node.child(STR("child2"))); + + CHECK(xml_node().first_child() == xml_node()); + CHECK(xml_node().last_child() == xml_node()); + + CHECK(doc.first_child() == node); + CHECK(doc.last_child() == node); +} + +TEST_XML(dom_node_find_child_by_attribute, "") +{ + CHECK(xml_node().find_child_by_attribute(STR("name"), STR("attr"), STR("value")) == xml_node()); + CHECK(xml_node().find_child_by_attribute(STR("attr"), STR("value")) == xml_node()); + + xml_node node = doc.child(STR("node")); + + CHECK(node.find_child_by_attribute(STR("child2"), STR("attr"), STR("value3")) == node.last_child()); + CHECK(node.find_child_by_attribute(STR("child2"), STR("attr3"), STR("value3")) == xml_node()); + CHECK(node.find_child_by_attribute(STR("attr"), STR("value2")) == node.child(STR("child2"))); + CHECK(node.find_child_by_attribute(STR("attr3"), STR("value")) == xml_node()); +} + +struct find_predicate_const +{ + bool result; + + find_predicate_const(bool result): result(result) + { + } + + template bool operator()(const T&) const + { + return result; + } +}; + +struct find_predicate_prefix +{ + const pugi::char_t* prefix; + + find_predicate_prefix(const pugi::char_t* prefix): prefix(prefix) + { + } + + template bool operator()(const T& obj) const + { + #ifdef PUGIXML_WCHAR_MODE + // can't use wcsncmp here because of a bug in DMC + return std::basic_string(obj.name()).compare(0, wcslen(prefix), prefix) == 0; + #else + return strncmp(obj.name(), prefix, strlen(prefix)) == 0; + #endif + } +}; + +TEST_XML(dom_node_find_attribute, "") +{ + CHECK(xml_node().find_attribute(find_predicate_const(true)) == xml_attribute()); + + xml_node node = doc.child(STR("node")); + + CHECK(doc.find_attribute(find_predicate_const(true)) == xml_attribute()); + CHECK(node.find_attribute(find_predicate_const(true)) == node.first_attribute()); + CHECK(node.find_attribute(find_predicate_const(false)) == xml_attribute()); + CHECK(node.find_attribute(find_predicate_prefix(STR("attr2"))) == node.last_attribute()); + CHECK(node.find_attribute(find_predicate_prefix(STR("attr"))) == node.first_attribute()); +} + +TEST_XML(dom_node_find_child, "") +{ + CHECK(xml_node().find_child(find_predicate_const(true)) == xml_node()); + + xml_node node = doc.child(STR("node")); + + CHECK(node.child(STR("node")).child(STR("child1")).find_child(find_predicate_const(true)) == xml_node()); + CHECK(node.find_child(find_predicate_const(true)) == node.first_child()); + CHECK(node.find_child(find_predicate_const(false)) == xml_node()); + CHECK(node.find_child(find_predicate_prefix(STR("child2"))) == node.last_child()); + CHECK(node.find_child(find_predicate_prefix(STR("child"))) == node.first_child()); +} + +TEST_XML(dom_node_find_node, "") +{ + CHECK(xml_node().find_node(find_predicate_const(true)) == xml_node()); + + xml_node node = doc.child(STR("node")); + + CHECK(node.child(STR("node")).child(STR("child1")).find_node(find_predicate_const(true)) == xml_node()); + CHECK(node.find_node(find_predicate_const(true)) == node.first_child()); + CHECK(node.find_node(find_predicate_const(false)) == xml_node()); + CHECK(node.find_node(find_predicate_prefix(STR("child2"))) == node.last_child()); + CHECK(node.find_node(find_predicate_prefix(STR("child"))) == node.first_child()); + CHECK(doc.find_node(find_predicate_prefix(STR("child"))) == node.first_child()); + CHECK(doc.find_node(find_predicate_prefix(STR("child2"))) == node.last_child()); + CHECK(doc.find_node(find_predicate_prefix(STR("child3"))) == xml_node()); +} + +#ifndef PUGIXML_NO_STL +TEST_XML(dom_node_path, "text") +{ + CHECK(xml_node().path() == STR("")); + + CHECK(doc.path() == STR("")); + CHECK(doc.child(STR("node")).path() == STR("/node")); + CHECK(doc.child(STR("node")).child(STR("child1")).path() == STR("/node/child1")); + CHECK(doc.child(STR("node")).child(STR("child1")).child(STR("child2")).path() == STR("/node/child1/child2")); + CHECK(doc.child(STR("node")).child(STR("child1")).first_child().path() == STR("/node/child1/")); + + CHECK(doc.child(STR("node")).child(STR("child1")).path('\\') == STR("\\node\\child1")); +} +#endif + +TEST_XML(dom_node_first_element_by_path, "text") +{ + CHECK(xml_node().first_element_by_path(STR("/")) == xml_node()); + + CHECK(doc.first_element_by_path(STR("")) == doc); + CHECK(doc.first_element_by_path(STR("/")) == doc); + + CHECK(doc.first_element_by_path(STR("/node/")) == doc.child(STR("node"))); + CHECK(doc.first_element_by_path(STR("node/")) == doc.child(STR("node"))); + CHECK(doc.first_element_by_path(STR("node")) == doc.child(STR("node"))); + CHECK(doc.first_element_by_path(STR("/node")) == doc.child(STR("node"))); + +#ifndef PUGIXML_NO_STL + CHECK(doc.first_element_by_path(STR("/node/child1/child2")).path() == STR("/node/child1/child2")); +#endif + + CHECK(doc.first_element_by_path(STR("/node/child2")) == xml_node()); + + CHECK(doc.first_element_by_path(STR("\\node\\child1"), '\\') == doc.child(STR("node")).child(STR("child1"))); + + CHECK(doc.child(STR("node")).first_element_by_path(STR("..")) == doc); + CHECK(doc.child(STR("node")).first_element_by_path(STR(".")) == doc.child(STR("node"))); + + CHECK(doc.child(STR("node")).first_element_by_path(STR("../node/./child1/../.")) == doc.child(STR("node"))); + + CHECK(doc.child(STR("node")).first_element_by_path(STR("child1")) == doc.child(STR("node")).child(STR("child1"))); + CHECK(doc.child(STR("node")).first_element_by_path(STR("child1/")) == doc.child(STR("node")).child(STR("child1"))); + CHECK(doc.child(STR("node")).first_element_by_path(STR("child")) == xml_node()); + CHECK(doc.child(STR("node")).first_element_by_path(STR("child11")) == xml_node()); +} + +struct test_walker: xml_tree_walker +{ + std::basic_string log; + unsigned int call_count; + unsigned int stop_count; + + test_walker(unsigned int stop_count = 0): call_count(0), stop_count(stop_count) + { + } + + std::basic_string depthstr() const + { + char buf[32]; + sprintf(buf, "%d", depth()); + + #ifdef PUGIXML_WCHAR_MODE + wchar_t wbuf[32]; + std::copy(buf, buf + strlen(buf) + 1, &wbuf[0]); + + return std::basic_string(wbuf); + #else + return std::basic_string(buf); + #endif + } + + virtual bool begin(xml_node& node) + { + log += STR("|"); + log += depthstr(); + log += STR(" <"); + log += node.name(); + log += STR("="); + log += node.value(); + + return ++call_count != stop_count && xml_tree_walker::begin(node); + } + + virtual bool for_each(xml_node& node) + { + log += STR("|"); + log += depthstr(); + log += STR(" !"); + log += node.name(); + log += STR("="); + log += node.value(); + + return ++call_count != stop_count && xml_tree_walker::end(node); + } + + virtual bool end(xml_node& node) + { + log += STR("|"); + log += depthstr(); + log += STR(" >"); + log += node.name(); + log += STR("="); + log += node.value(); + + return ++call_count != stop_count; + } +}; + +TEST_XML(dom_node_traverse, "text") +{ + test_walker walker; + + CHECK(doc.traverse(walker)); + + CHECK(walker.call_count == 5); + CHECK(walker.log == STR("|-1 <=|0 !node=|1 !child=|2 !=text|-1 >=")); +} + +TEST_XML(dom_node_traverse_siblings, "text") +{ + test_walker walker; + + CHECK(doc.traverse(walker)); + + CHECK(walker.call_count == 7); + CHECK(walker.log == STR("|-1 <=|0 !node=|1 !child=|1 !child=|2 !=text|1 !child=|-1 >=")); +} + +TEST(dom_node_traverse_empty) +{ + test_walker walker; + + CHECK(xml_node().traverse(walker)); + + CHECK(walker.call_count == 2); + CHECK(walker.log == STR("|-1 <=|-1 >=")); +} + +TEST_XML(dom_node_traverse_child, "text") +{ + test_walker walker; + + CHECK(doc.child(STR("node")).traverse(walker)); + + CHECK(walker.call_count == 4); + CHECK(walker.log == STR("|-1 node=")); +} + +TEST_XML(dom_node_traverse_stop_begin, "text") +{ + test_walker walker(1); + + CHECK(!doc.traverse(walker)); + + CHECK(walker.call_count == 1); + CHECK(walker.log == STR("|-1 <=")); +} + +TEST_XML(dom_node_traverse_stop_for_each, "text") +{ + test_walker walker(3); + + CHECK(!doc.traverse(walker)); + + CHECK(walker.call_count == 3); + CHECK(walker.log == STR("|-1 <=|0 !node=|1 !child=")); +} + +TEST_XML(dom_node_traverse_stop_end, "text") +{ + test_walker walker(5); + + CHECK(!doc.traverse(walker)); + + CHECK(walker.call_count == 5); + CHECK(walker.log == STR("|-1 <=|0 !node=|1 !child=|2 !=text|-1 >=")); +} + +TEST_XML_FLAGS(dom_offset_debug, "pcdata", parse_default | parse_pi | parse_comments | parse_declaration) +{ + CHECK(xml_node().offset_debug() == -1); + CHECK(doc.offset_debug() == 0); + + xml_node_iterator it = doc.begin(); + + CHECK((it++)->offset_debug() == 2); + CHECK((it++)->offset_debug() == 9); + CHECK((it++)->offset_debug() == 17); + CHECK((it++)->offset_debug() == 28); + + xml_node_iterator cit = doc.child(STR("node")).begin(); + + CHECK((cit++)->offset_debug() == 33); + CHECK((cit++)->offset_debug() == 48); +} diff --git a/tests/test_header_guard.cpp b/tests/test_header_guard.cpp index 3706cc9..2f65928 100644 --- a/tests/test_header_guard.cpp +++ b/tests/test_header_guard.cpp @@ -1,3 +1,3 @@ -// Tests header guards -#include "../src/pugixml.hpp" -#include "../src/pugixml.hpp" +// Tests header guards +#include "../src/pugixml.hpp" +#include "../src/pugixml.hpp" diff --git a/tests/test_header_iosfwd_1.cpp b/tests/test_header_iosfwd_1.cpp index 73e8527..0ed528a 100644 --- a/tests/test_header_iosfwd_1.cpp +++ b/tests/test_header_iosfwd_1.cpp @@ -1,3 +1,3 @@ -// Tests compatibility with iosfwd -#include "../src/pugixml.hpp" -#include +// Tests compatibility with iosfwd +#include "../src/pugixml.hpp" +#include diff --git a/tests/test_header_iosfwd_2.cpp b/tests/test_header_iosfwd_2.cpp index e472b9c..865d0d8 100644 --- a/tests/test_header_iosfwd_2.cpp +++ b/tests/test_header_iosfwd_2.cpp @@ -1,3 +1,3 @@ -// Tests compatibility with iosfwd -#include -#include "../src/pugixml.hpp" +// Tests compatibility with iosfwd +#include +#include "../src/pugixml.hpp" diff --git a/tests/test_header_iostream_1.cpp b/tests/test_header_iostream_1.cpp index 2b359f9..a836d4f 100644 --- a/tests/test_header_iostream_1.cpp +++ b/tests/test_header_iostream_1.cpp @@ -1,3 +1,3 @@ -// Tests compatibility with iostream -#include "../src/pugixml.hpp" -#include +// Tests compatibility with iostream +#include "../src/pugixml.hpp" +#include diff --git a/tests/test_header_iostream_2.cpp b/tests/test_header_iostream_2.cpp index 0b1b6b8..c0be50b 100644 --- a/tests/test_header_iostream_2.cpp +++ b/tests/test_header_iostream_2.cpp @@ -1,3 +1,3 @@ -// Tests compatibility with iostream -#include -#include "../src/pugixml.hpp" +// Tests compatibility with iostream +#include +#include "../src/pugixml.hpp" diff --git a/tests/test_header_string_1.cpp b/tests/test_header_string_1.cpp index 9e9d33f..07d1263 100644 --- a/tests/test_header_string_1.cpp +++ b/tests/test_header_string_1.cpp @@ -1,3 +1,3 @@ -// Tests compatibility with string -#include "../src/pugixml.hpp" -#include +// Tests compatibility with string +#include "../src/pugixml.hpp" +#include diff --git a/tests/test_header_string_2.cpp b/tests/test_header_string_2.cpp index 01d72ac..2813fc9 100644 --- a/tests/test_header_string_2.cpp +++ b/tests/test_header_string_2.cpp @@ -1,3 +1,3 @@ -// Tests compatibility with string -#include -#include "../src/pugixml.hpp" +// Tests compatibility with string +#include +#include "../src/pugixml.hpp" diff --git a/tests/test_memory.cpp b/tests/test_memory.cpp index 80e36c2..8b4b6bc 100644 --- a/tests/test_memory.cpp +++ b/tests/test_memory.cpp @@ -1,129 +1,129 @@ -#include "common.hpp" - -#include - -namespace -{ - int allocate_count = 0; - int deallocate_count = 0; - - void* allocate(size_t size) - { - ++allocate_count; - return new char[size]; - } - - void deallocate(void* ptr) - { - ++deallocate_count; - delete[] reinterpret_cast(ptr); - } -} - -TEST(custom_memory_management) -{ - allocate_count = deallocate_count = 0; - - // remember old functions - allocation_function old_allocate = get_memory_allocation_function(); - deallocation_function old_deallocate = get_memory_deallocation_function(); - - // replace functions - set_memory_management_functions(allocate, deallocate); - - { - // parse document - xml_document doc; - - CHECK(allocate_count == 0 && deallocate_count == 0); - - CHECK(doc.load(STR(""))); - - CHECK(allocate_count == 2 && deallocate_count == 0); - - // modify document (no new page) - CHECK(doc.first_child().set_name(STR("foobars"))); - CHECK(allocate_count == 2 && deallocate_count == 0); - - // modify document (new page) - std::basic_string s(65536, 'x'); - - CHECK(doc.first_child().set_name(s.c_str())); - CHECK(allocate_count == 3 && deallocate_count == 0); - - // modify document (new page, old one should die) - s += s; - - CHECK(doc.first_child().set_name(s.c_str())); - CHECK(allocate_count == 4 && deallocate_count == 1); - } - - CHECK(allocate_count == 4 && deallocate_count == 4); - - // restore old functions - set_memory_management_functions(old_allocate, old_deallocate); -} - -TEST(large_allocations) -{ - allocate_count = deallocate_count = 0; - - // remember old functions - allocation_function old_allocate = get_memory_allocation_function(); - deallocation_function old_deallocate = get_memory_deallocation_function(); - - // replace functions - set_memory_management_functions(allocate, deallocate); - - { - xml_document doc; - - CHECK(allocate_count == 0 && deallocate_count == 0); - - // initial fill - for (size_t i = 0; i < 128; ++i) - { - std::basic_string s(i * 128, 'x'); - - CHECK(doc.append_child(node_pcdata).set_value(s.c_str())); - } - - CHECK(allocate_count > 0 && deallocate_count == 0); - - // grow-prune loop - while (doc.first_child()) - { - pugi::xml_node node; - - // grow - for (node = doc.first_child(); node; node = node.next_sibling()) - { - std::basic_string s = node.value(); - - CHECK(node.set_value((s + s).c_str())); - } - - // prune - for (node = doc.first_child(); node; ) - { - pugi::xml_node next = node.next_sibling().next_sibling(); - - node.parent().remove_child(node); - - node = next; - } - } - - CHECK(allocate_count == deallocate_count + 1); // only one live page left (it waits for new allocations) - - char buffer; - CHECK(doc.load_buffer_inplace(&buffer, 0, parse_default, get_native_encoding())); - - CHECK(allocate_count == deallocate_count); // no live pages left - } - - CHECK(allocate_count == deallocate_count); // everything is freed - - // restore old functions - set_memory_management_functions(old_allocate, old_deallocate); -} +#include "common.hpp" + +#include + +namespace +{ + int allocate_count = 0; + int deallocate_count = 0; + + void* allocate(size_t size) + { + ++allocate_count; + return new char[size]; + } + + void deallocate(void* ptr) + { + ++deallocate_count; + delete[] reinterpret_cast(ptr); + } +} + +TEST(custom_memory_management) +{ + allocate_count = deallocate_count = 0; + + // remember old functions + allocation_function old_allocate = get_memory_allocation_function(); + deallocation_function old_deallocate = get_memory_deallocation_function(); + + // replace functions + set_memory_management_functions(allocate, deallocate); + + { + // parse document + xml_document doc; + + CHECK(allocate_count == 0 && deallocate_count == 0); + + CHECK(doc.load(STR(""))); + + CHECK(allocate_count == 2 && deallocate_count == 0); + + // modify document (no new page) + CHECK(doc.first_child().set_name(STR("foobars"))); + CHECK(allocate_count == 2 && deallocate_count == 0); + + // modify document (new page) + std::basic_string s(65536, 'x'); + + CHECK(doc.first_child().set_name(s.c_str())); + CHECK(allocate_count == 3 && deallocate_count == 0); + + // modify document (new page, old one should die) + s += s; + + CHECK(doc.first_child().set_name(s.c_str())); + CHECK(allocate_count == 4 && deallocate_count == 1); + } + + CHECK(allocate_count == 4 && deallocate_count == 4); + + // restore old functions + set_memory_management_functions(old_allocate, old_deallocate); +} + +TEST(large_allocations) +{ + allocate_count = deallocate_count = 0; + + // remember old functions + allocation_function old_allocate = get_memory_allocation_function(); + deallocation_function old_deallocate = get_memory_deallocation_function(); + + // replace functions + set_memory_management_functions(allocate, deallocate); + + { + xml_document doc; + + CHECK(allocate_count == 0 && deallocate_count == 0); + + // initial fill + for (size_t i = 0; i < 128; ++i) + { + std::basic_string s(i * 128, 'x'); + + CHECK(doc.append_child(node_pcdata).set_value(s.c_str())); + } + + CHECK(allocate_count > 0 && deallocate_count == 0); + + // grow-prune loop + while (doc.first_child()) + { + pugi::xml_node node; + + // grow + for (node = doc.first_child(); node; node = node.next_sibling()) + { + std::basic_string s = node.value(); + + CHECK(node.set_value((s + s).c_str())); + } + + // prune + for (node = doc.first_child(); node; ) + { + pugi::xml_node next = node.next_sibling().next_sibling(); + + node.parent().remove_child(node); + + node = next; + } + } + + CHECK(allocate_count == deallocate_count + 1); // only one live page left (it waits for new allocations) + + char buffer; + CHECK(doc.load_buffer_inplace(&buffer, 0, parse_default, get_native_encoding())); + + CHECK(allocate_count == deallocate_count); // no live pages left + } + + CHECK(allocate_count == deallocate_count); // everything is freed + + // restore old functions + set_memory_management_functions(old_allocate, old_deallocate); +} diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index 7f52d09..a997692 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -1,683 +1,683 @@ -#include "common.hpp" - -TEST(parse_pi_skip) -{ - xml_document doc; - - unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_declaration}; - - for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) - { - unsigned int flags = flag_sets[i]; - - CHECK(doc.load(STR(""), flags)); - CHECK(!doc.first_child()); - - CHECK(doc.load(STR(" value?>"), flags)); - CHECK(!doc.first_child()); - } -} - -TEST(parse_pi_parse) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_pi)); - - xml_node pi1 = doc.first_child(); - xml_node pi2 = doc.last_child(); - - CHECK(pi1 != pi2); - CHECK(pi1.type() == node_pi); - CHECK_STRING(pi1.name(), STR("pi1")); - CHECK_STRING(pi1.value(), STR("")); - CHECK(pi2.type() == node_pi); - CHECK_STRING(pi2.name(), STR("pi2")); - CHECK_STRING(pi2.value(), STR("value")); -} - -TEST(parse_pi_error) -{ - xml_document doc; - - unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_pi}; - - for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) - { - unsigned int flags = flag_sets[i]; - - CHECK(doc.load(STR(""), flags).status == status_bad_pi); - CHECK(doc.load(STR(""), flags).status == status_bad_pi); - CHECK(doc.load(STR(""), flags).status == status_bad_pi); - CHECK(doc.load(STR(""), flags).status == status_bad_pi); - CHECK(doc.load(STR(" "), flags).status == status_bad_pi); - CHECK(doc.load(STR(""), parse_minimal | parse_pi).status == status_bad_pi); - CHECK(doc.load(STR(""), parse_minimal | parse_pi).status == status_bad_pi); - CHECK(doc.load(STR(""), parse_minimal | parse_pi).status == status_bad_pi); -} - -TEST(parse_comments_skip) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal)); - CHECK(!doc.first_child()); -} - -TEST(parse_comments_parse) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_comments)); - - xml_node c1 = doc.first_child(); - xml_node c2 = doc.last_child(); - - CHECK(c1 != c2); - CHECK(c1.type() == node_comment); - CHECK_STRING(c1.name(), STR("")); - CHECK_STRING(c1.value(), STR("")); - CHECK(c2.type() == node_comment); - CHECK_STRING(c2.name(), STR("")); - CHECK_STRING(c2.value(), STR("value")); -} - -TEST(parse_comments_parse_no_eol) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_comments)); - - xml_node c = doc.first_child(); - CHECK(c.type() == node_comment); - CHECK_STRING(c.value(), STR("\r\rval1\rval2\r\nval3\nval4\r\r")); -} - -TEST(parse_comments_parse_eol) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_comments | parse_eol)); - - xml_node c = doc.first_child(); - CHECK(c.type() == node_comment); - CHECK_STRING(c.value(), STR("\n\nval1\nval2\nval3\nval4\n\n")); -} - -TEST(parse_comments_error) -{ - xml_document doc; - - unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_comments, parse_minimal | parse_comments | parse_eol}; - - for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) - { - unsigned int flags = flag_sets[i]; - - CHECK(doc.load(STR(""), flags).status == status_bad_comment); - CHECK(doc.load(STR(""), flags).status == status_bad_comment); - CHECK(doc.load(STR(""), flags).status == status_bad_comment); - } -} - -TEST(parse_cdata_skip) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal)); - CHECK(!doc.first_child()); -} - -TEST(parse_cdata_parse) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_cdata)); - - xml_node c1 = doc.first_child(); - xml_node c2 = doc.last_child(); - - CHECK(c1 != c2); - CHECK(c1.type() == node_cdata); - CHECK_STRING(c1.name(), STR("")); - CHECK_STRING(c1.value(), STR("")); - CHECK(c2.type() == node_cdata); - CHECK_STRING(c2.name(), STR("")); - CHECK_STRING(c2.value(), STR("value")); -} - -TEST(parse_cdata_parse_no_eol) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_cdata)); - - xml_node c = doc.first_child(); - CHECK(c.type() == node_cdata); - CHECK_STRING(c.value(), STR("\r\rval1\rval2\r\nval3\nval4\r\r")); -} - -TEST(parse_cdata_parse_eol) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_cdata | parse_eol)); - - xml_node c = doc.first_child(); - CHECK(c.type() == node_cdata); - CHECK_STRING(c.value(), STR("\n\nval1\nval2\nval3\nval4\n\n")); -} - -TEST(parse_cdata_error) -{ - xml_document doc; - - unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_cdata, parse_minimal | parse_cdata | parse_eol}; - - for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) - { - unsigned int flags = flag_sets[i]; - - CHECK(doc.load(STR(""), flags).status == status_bad_cdata); - CHECK(doc.load(STR(""), flags).status == status_bad_cdata); - } -} - -TEST(parse_ws_pcdata_skip) -{ - xml_document doc; - CHECK(doc.load(STR(" "), parse_minimal)); - CHECK(!doc.first_child()); - - CHECK(doc.load(STR(" "), parse_minimal)); - - xml_node root = doc.child(STR("root")); - - CHECK(root.first_child() == root.last_child()); - CHECK(!root.first_child().first_child()); -} - -TEST(parse_ws_pcdata_parse) -{ - xml_document doc; - CHECK(doc.load(STR(" "), parse_minimal | parse_ws_pcdata)); - - xml_node root = doc.child(STR("root")); - - xml_node c1 = root.first_child(); - xml_node c2 = c1.next_sibling(); - xml_node c3 = c2.next_sibling(); - - CHECK(c3 == root.last_child()); - - CHECK(c1.type() == node_pcdata); - CHECK_STRING(c1.value(), STR(" ")); - CHECK(c3.type() == node_pcdata); - CHECK_STRING(c3.value(), STR(" ")); - - CHECK(c2.first_child() == c2.last_child()); - CHECK(c2.first_child().type() == node_pcdata); - CHECK_STRING(c2.first_child().value(), STR(" ")); -} - -TEST(parse_pcdata_no_eol) -{ - xml_document doc; - CHECK(doc.load(STR("\r\rval1\rval2\r\nval3\nval4\r\r"), parse_minimal)); - - CHECK_STRING(doc.child_value(STR("root")), STR("\r\rval1\rval2\r\nval3\nval4\r\r")); -} - -TEST(parse_pcdata_eol) -{ - xml_document doc; - CHECK(doc.load(STR("\r\rval1\rval2\r\nval3\nval4\r\r"), parse_minimal | parse_eol)); - - CHECK_STRING(doc.child_value(STR("root")), STR("\n\nval1\nval2\nval3\nval4\n\n")); -} - -TEST(parse_pcdata_skip_ext) -{ - xml_document doc; - CHECK(doc.load(STR("prepost"), parse_minimal)); - CHECK(doc.first_child() == doc.last_child()); - CHECK(doc.first_child().type() == node_element); -} - -TEST(parse_pcdata_error) -{ - xml_document doc; - CHECK(doc.load(STR("pcdata"), parse_minimal).status == status_end_element_mismatch); -} - -TEST(parse_escapes_skip) -{ - xml_document doc; - CHECK(doc.load(STR("<>&'""), parse_minimal)); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'"")); -} - -TEST(parse_escapes_parse) -{ - xml_document doc; - CHECK(doc.load(STR("<>&'""), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("<>&'\"")); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'\"")); -} - -TEST(parse_escapes_code) -{ - xml_document doc; - CHECK(doc.load(STR(" "), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("\01 ")); -} - -TEST(parse_escapes_code_exhaustive_dec) -{ - xml_document doc; - CHECK(doc.load(STR("&#/; &#:;&#a;&#A; "), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("&#/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#:;&#a;&#A; ")); -} - -TEST(parse_escapes_code_exhaustive_hex) -{ - xml_document doc; - CHECK(doc.load(STR("&#x/; &#x:;&#x@; &#xG;&#x`; &#xg;"), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("&#x/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#x:;&#x@;\xa\xb\xc\xd\xe\xf&#xG;&#x`;\xa\xb\xc\xd\xe\xf&#xg;")); -} - -TEST(parse_escapes_code_restore) -{ - xml_document doc; - CHECK(doc.load(STR("  - - "), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("  - - ")); -} - -TEST(parse_escapes_char_restore) -{ - xml_document doc; - - CHECK(doc.load(STR("&q &qu &quo " "), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("&q &qu &quo " ")); - - CHECK(doc.load(STR("&a &ap &apo &apos "), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("&a &ap &apo &apos ")); - - CHECK(doc.load(STR("&a &am & "), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("&a &am & ")); - - CHECK(doc.load(STR("&l < "), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("&l < ")); - - CHECK(doc.load(STR("&g > "), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("&g > ")); -} - -TEST(parse_escapes_unicode) -{ - xml_document doc; - CHECK(doc.load(STR("γγ𤭢"), parse_minimal | parse_escapes)); - -#ifdef PUGIXML_WCHAR_MODE - const pugi::char_t* v = doc.child_value(STR("node")); - - unsigned int v2 = v[2]; - size_t wcharsize = sizeof(wchar_t); - - CHECK(v[0] == 0x3b3 && v[1] == 0x3b3 && (wcharsize == 2 ? v[2] == 0xd852 && v[3] == 0xdf62 : v2 == 0x24b62)); -#else - CHECK_STRING(doc.child_value(STR("node")), "\xce\xb3\xce\xb3\xf0\xa4\xad\xa2"); -#endif -} - -TEST(parse_escapes_error) -{ - xml_document doc; - CHECK(doc.load(STR("g;&#ab;""), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("g;&#ab;"")); - - CHECK(!doc.load(STR("&#;&#x;&;&#x-;&#-;"), parse_minimal | parse_escapes)); - CHECK_STRING(doc.child_value(STR("node")), STR("&#;&#x;&;&#x-;&#-;")); -} - -TEST(parse_attribute_spaces) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal)); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1")); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2")); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id3")).value(), STR("v3")); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id4")).value(), STR("v4")); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id5")).value(), STR("v5")); -} - -TEST(parse_attribute_quot) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal)); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1")); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2")); -} - -TEST(parse_attribute_no_eol_no_wconv) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal)); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\r\rval1 \rval2\r\nval3\nval4\r\r")); -} - -TEST(parse_attribute_eol_no_wconv) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_eol)); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\n\nval1 \nval2\nval3\nval4\n\n")); -} - -TEST(parse_attribute_no_eol_wconv) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_wconv_attribute)); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 ")); -} - -TEST(parse_attribute_eol_wconv) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_eol | parse_wconv_attribute)); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 ")); -} - -TEST(parse_attribute_wnorm) -{ - xml_document doc; - - for (int eol = 0; eol < 2; ++eol) - for (int wconv = 0; wconv < 2; ++wconv) - { - unsigned int flags = parse_minimal | parse_wnorm_attribute | (eol ? parse_eol : 0) | (wconv ? parse_wconv_attribute : 0); - CHECK(doc.load(STR(""), flags)); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("val1 val2 val3 val4")); - } -} - -TEST(parse_attribute_variations) -{ - xml_document doc; - - for (int wnorm = 0; wnorm < 2; ++wnorm) - for (int eol = 0; eol < 2; ++eol) - for (int wconv = 0; wconv < 2; ++wconv) - for (int escapes = 0; escapes < 2; ++escapes) - { - unsigned int flags = parse_minimal; - - flags |= (wnorm ? parse_wnorm_attribute : 0); - flags |= (eol ? parse_eol : 0); - flags |= (wconv ? parse_wconv_attribute : 0); - flags |= (escapes ? parse_escapes : 0); - - CHECK(doc.load(STR(""), flags)); - CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("1")); - } -} - - -TEST(parse_attribute_error) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); -} - -TEST(parse_tag_single) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal)); - CHECK_NODE(doc, STR("")); -} - -TEST(parse_tag_hierarchy) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal)); - CHECK_NODE(doc, STR("")); -} - -TEST(parse_tag_error) -{ - xml_document doc; - CHECK(doc.load(STR("<"), parse_minimal).status == status_unrecognized_tag); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); - CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); - CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); - CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); - CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); - CHECK(doc.load(STR("<"), parse_minimal).status == status_unrecognized_tag); - CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); - CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); - CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); - CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_end_element); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); - CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); -} - -TEST(parse_declaration_cases) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_pi)); - CHECK(!doc.first_child()); -} - -TEST(parse_declaration_attr_cases) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_pi)); - CHECK(!doc.first_child()); -} - -TEST(parse_declaration_skip) -{ - xml_document doc; - - unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_pi}; - - for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) - { - unsigned int flags = flag_sets[i]; - - CHECK(doc.load(STR(""), flags)); - CHECK(!doc.first_child()); - - CHECK(doc.load(STR(" ?>"), flags)); - CHECK(!doc.first_child()); - } -} - -TEST(parse_declaration_parse) -{ - xml_document doc; - CHECK(doc.load(STR(""), parse_minimal | parse_declaration)); - - xml_node d1 = doc.first_child(); - xml_node d2 = doc.last_child(); - - CHECK(d1 != d2); - CHECK(d1.type() == node_declaration); - CHECK_STRING(d1.name(), STR("xml")); - CHECK(d2.type() == node_declaration); - CHECK_STRING(d2.name(), STR("xml")); - CHECK_STRING(d2.attribute(STR("version")).value(), STR("1.0")); -} - -TEST(parse_declaration_error) -{ - xml_document doc; - - unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_declaration}; - - for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) - { - unsigned int flags = flag_sets[i]; - - CHECK(doc.load(STR(""), flags).status == status_bad_pi); - CHECK(doc.load(STR(""), flags).status == status_bad_pi); - } - - CHECK(doc.load(STR(""), parse_minimal | parse_declaration).status == status_bad_attribute); - CHECK(doc.load(STR(""), parse_minimal | parse_declaration).status == status_bad_pi); -} - -TEST(parse_empty) -{ - xml_document doc; - CHECK(doc.load(STR("")) && !doc.first_child()); -} - -TEST(parse_out_of_memory) -{ - test_runner::_memory_fail_threshold = 256; - - xml_document doc; - CHECK(doc.load(STR("")).status == status_out_of_memory); - CHECK(!doc.first_child()); -} - -TEST(parse_out_of_memory_halfway) -{ - unsigned int count = 10000; - char_t* text = new char_t[count * 4]; - - for (unsigned int i = 0; i < count; ++i) - { - text[4*i + 0] = '<'; - text[4*i + 1] = 'n'; - text[4*i + 2] = '/'; - text[4*i + 3] = '>'; - } - - test_runner::_memory_fail_threshold = 65536; - - xml_document doc; - CHECK(doc.load_buffer_inplace(text, count * 4).status == status_out_of_memory); - CHECK_NODE(doc.first_child(), STR("")); - - delete[] text; -} - -static bool test_offset(const char_t* contents, unsigned int options, pugi::xml_parse_status status, ptrdiff_t offset) -{ - xml_document doc; - xml_parse_result res = doc.load(contents, options); - - return res.status == status && res.offset == offset; -} - -#define CHECK_OFFSET(contents, options, status, offset) CHECK(test_offset(STR(contents), options, status, offset)) - -TEST(parse_error_offset) -{ - CHECK_OFFSET("", parse_default, status_ok, 0); - - test_runner::_memory_fail_threshold = 1; - CHECK_OFFSET("", parse_default, status_out_of_memory, 0); - test_runner::_memory_fail_threshold = 0; - - CHECK_OFFSET("<3d/>", parse_default, status_unrecognized_tag, 1); - CHECK_OFFSET(" <3d/>", parse_default, status_unrecognized_tag, 2); - CHECK_OFFSET(" <", parse_default, status_unrecognized_tag, 2); - - CHECK_OFFSET("", parse_default, status_bad_start_element, 5); - - CHECK_OFFSET("", parse_default, status_bad_attribute, 8); - CHECK_OFFSET("<>&'""), parse_minimal)); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'"")); +} + +TEST(parse_escapes_parse) +{ + xml_document doc; + CHECK(doc.load(STR("<>&'""), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("<>&'\"")); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'\"")); +} + +TEST(parse_escapes_code) +{ + xml_document doc; + CHECK(doc.load(STR(" "), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("\01 ")); +} + +TEST(parse_escapes_code_exhaustive_dec) +{ + xml_document doc; + CHECK(doc.load(STR("&#/; &#:;&#a;&#A; "), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("&#/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#:;&#a;&#A; ")); +} + +TEST(parse_escapes_code_exhaustive_hex) +{ + xml_document doc; + CHECK(doc.load(STR("&#x/; &#x:;&#x@; &#xG;&#x`; &#xg;"), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("&#x/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#x:;&#x@;\xa\xb\xc\xd\xe\xf&#xG;&#x`;\xa\xb\xc\xd\xe\xf&#xg;")); +} + +TEST(parse_escapes_code_restore) +{ + xml_document doc; + CHECK(doc.load(STR("  - - "), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("  - - ")); +} + +TEST(parse_escapes_char_restore) +{ + xml_document doc; + + CHECK(doc.load(STR("&q &qu &quo " "), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("&q &qu &quo " ")); + + CHECK(doc.load(STR("&a &ap &apo &apos "), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("&a &ap &apo &apos ")); + + CHECK(doc.load(STR("&a &am & "), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("&a &am & ")); + + CHECK(doc.load(STR("&l < "), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("&l < ")); + + CHECK(doc.load(STR("&g > "), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("&g > ")); +} + +TEST(parse_escapes_unicode) +{ + xml_document doc; + CHECK(doc.load(STR("γγ𤭢"), parse_minimal | parse_escapes)); + +#ifdef PUGIXML_WCHAR_MODE + const pugi::char_t* v = doc.child_value(STR("node")); + + unsigned int v2 = v[2]; + size_t wcharsize = sizeof(wchar_t); + + CHECK(v[0] == 0x3b3 && v[1] == 0x3b3 && (wcharsize == 2 ? v[2] == 0xd852 && v[3] == 0xdf62 : v2 == 0x24b62)); +#else + CHECK_STRING(doc.child_value(STR("node")), "\xce\xb3\xce\xb3\xf0\xa4\xad\xa2"); +#endif +} + +TEST(parse_escapes_error) +{ + xml_document doc; + CHECK(doc.load(STR("g;&#ab;""), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("g;&#ab;"")); + + CHECK(!doc.load(STR("&#;&#x;&;&#x-;&#-;"), parse_minimal | parse_escapes)); + CHECK_STRING(doc.child_value(STR("node")), STR("&#;&#x;&;&#x-;&#-;")); +} + +TEST(parse_attribute_spaces) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal)); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1")); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2")); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id3")).value(), STR("v3")); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id4")).value(), STR("v4")); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id5")).value(), STR("v5")); +} + +TEST(parse_attribute_quot) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal)); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1")); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2")); +} + +TEST(parse_attribute_no_eol_no_wconv) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal)); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\r\rval1 \rval2\r\nval3\nval4\r\r")); +} + +TEST(parse_attribute_eol_no_wconv) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal | parse_eol)); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\n\nval1 \nval2\nval3\nval4\n\n")); +} + +TEST(parse_attribute_no_eol_wconv) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal | parse_wconv_attribute)); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 ")); +} + +TEST(parse_attribute_eol_wconv) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal | parse_eol | parse_wconv_attribute)); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 ")); +} + +TEST(parse_attribute_wnorm) +{ + xml_document doc; + + for (int eol = 0; eol < 2; ++eol) + for (int wconv = 0; wconv < 2; ++wconv) + { + unsigned int flags = parse_minimal | parse_wnorm_attribute | (eol ? parse_eol : 0) | (wconv ? parse_wconv_attribute : 0); + CHECK(doc.load(STR(""), flags)); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("val1 val2 val3 val4")); + } +} + +TEST(parse_attribute_variations) +{ + xml_document doc; + + for (int wnorm = 0; wnorm < 2; ++wnorm) + for (int eol = 0; eol < 2; ++eol) + for (int wconv = 0; wconv < 2; ++wconv) + for (int escapes = 0; escapes < 2; ++escapes) + { + unsigned int flags = parse_minimal; + + flags |= (wnorm ? parse_wnorm_attribute : 0); + flags |= (eol ? parse_eol : 0); + flags |= (wconv ? parse_wconv_attribute : 0); + flags |= (escapes ? parse_escapes : 0); + + CHECK(doc.load(STR(""), flags)); + CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("1")); + } +} + + +TEST(parse_attribute_error) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_attribute); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); +} + +TEST(parse_tag_single) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal)); + CHECK_NODE(doc, STR("")); +} + +TEST(parse_tag_hierarchy) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal)); + CHECK_NODE(doc, STR("")); +} + +TEST(parse_tag_error) +{ + xml_document doc; + CHECK(doc.load(STR("<"), parse_minimal).status == status_unrecognized_tag); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); + CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); + CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); + CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); + CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); + CHECK(doc.load(STR("<"), parse_minimal).status == status_unrecognized_tag); + CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); + CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); + CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); + CHECK(doc.load(STR(""), parse_minimal).status == status_end_element_mismatch); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_end_element); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); + CHECK(doc.load(STR(""), parse_minimal).status == status_bad_start_element); +} + +TEST(parse_declaration_cases) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal | parse_pi)); + CHECK(!doc.first_child()); +} + +TEST(parse_declaration_attr_cases) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal | parse_pi)); + CHECK(!doc.first_child()); +} + +TEST(parse_declaration_skip) +{ + xml_document doc; + + unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_pi}; + + for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) + { + unsigned int flags = flag_sets[i]; + + CHECK(doc.load(STR(""), flags)); + CHECK(!doc.first_child()); + + CHECK(doc.load(STR(" ?>"), flags)); + CHECK(!doc.first_child()); + } +} + +TEST(parse_declaration_parse) +{ + xml_document doc; + CHECK(doc.load(STR(""), parse_minimal | parse_declaration)); + + xml_node d1 = doc.first_child(); + xml_node d2 = doc.last_child(); + + CHECK(d1 != d2); + CHECK(d1.type() == node_declaration); + CHECK_STRING(d1.name(), STR("xml")); + CHECK(d2.type() == node_declaration); + CHECK_STRING(d2.name(), STR("xml")); + CHECK_STRING(d2.attribute(STR("version")).value(), STR("1.0")); +} + +TEST(parse_declaration_error) +{ + xml_document doc; + + unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_declaration}; + + for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) + { + unsigned int flags = flag_sets[i]; + + CHECK(doc.load(STR(""), flags).status == status_bad_pi); + CHECK(doc.load(STR(""), flags).status == status_bad_pi); + } + + CHECK(doc.load(STR(""), parse_minimal | parse_declaration).status == status_bad_attribute); + CHECK(doc.load(STR(""), parse_minimal | parse_declaration).status == status_bad_pi); +} + +TEST(parse_empty) +{ + xml_document doc; + CHECK(doc.load(STR("")) && !doc.first_child()); +} + +TEST(parse_out_of_memory) +{ + test_runner::_memory_fail_threshold = 256; + + xml_document doc; + CHECK(doc.load(STR("")).status == status_out_of_memory); + CHECK(!doc.first_child()); +} + +TEST(parse_out_of_memory_halfway) +{ + unsigned int count = 10000; + char_t* text = new char_t[count * 4]; + + for (unsigned int i = 0; i < count; ++i) + { + text[4*i + 0] = '<'; + text[4*i + 1] = 'n'; + text[4*i + 2] = '/'; + text[4*i + 3] = '>'; + } + + test_runner::_memory_fail_threshold = 65536; + + xml_document doc; + CHECK(doc.load_buffer_inplace(text, count * 4).status == status_out_of_memory); + CHECK_NODE(doc.first_child(), STR("")); + + delete[] text; +} + +static bool test_offset(const char_t* contents, unsigned int options, pugi::xml_parse_status status, ptrdiff_t offset) +{ + xml_document doc; + xml_parse_result res = doc.load(contents, options); + + return res.status == status && res.offset == offset; +} + +#define CHECK_OFFSET(contents, options, status, offset) CHECK(test_offset(STR(contents), options, status, offset)) + +TEST(parse_error_offset) +{ + CHECK_OFFSET("", parse_default, status_ok, 0); + + test_runner::_memory_fail_threshold = 1; + CHECK_OFFSET("", parse_default, status_out_of_memory, 0); + test_runner::_memory_fail_threshold = 0; + + CHECK_OFFSET("<3d/>", parse_default, status_unrecognized_tag, 1); + CHECK_OFFSET(" <3d/>", parse_default, status_unrecognized_tag, 2); + CHECK_OFFSET(" <", parse_default, status_unrecognized_tag, 2); + + CHECK_OFFSET("", parse_default, status_bad_start_element, 5); + + CHECK_OFFSET("", parse_default, status_bad_attribute, 8); + CHECK_OFFSET(""); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF("]>"); -} - -TEST(parse_doctype_error) -{ - TEST_DOCTYPE_NWF(""); - TEST_DOCTYPE_NWF("]"); - TEST_DOCTYPE_NWF("] "); -} - -// Examples from W3C recommendations -TEST(parse_doctype_w3c_wf) -{ - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]]> ]]>]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); -} - -TEST(parse_doctype_w3c_nwf) -{ - TEST_DOCTYPE_NWF(""); - TEST_DOCTYPE_NWF(" ]"); - TEST_DOCTYPE_NWF(""); - TEST_DOCTYPE_NWF(" ]"); - TEST_DOCTYPE_NWF(""); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" %e; ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" \"> ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" %pe; %intpe; ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF("]]> ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_NWF(" \"> \"> "); - TEST_DOCTYPE_WF(" ]>"); -} - -TEST(parse_doctype_xmlconf_eduni_2) -{ - TEST_DOCTYPE_WF("\"> %pe; ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); -} - -TEST(parse_doctype_xmlconf_eduni_3) -{ - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF("\"> ]>"); -} - -TEST(parse_doctype_xmlconf_eduni_4) -{ - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); -} - -TEST(parse_doctype_xmlconf_eduni_5) -{ - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); -} - -TEST(parse_doctype_xmlconf_ibm_1) -{ - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" %pe1; ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" \"> %pe3; ]>"); - TEST_DOCTYPE_WF(" '> %pe1; ]>"); -} - -TEST(parse_doctype_xmlconf_ibm_2) -{ - TEST_DOCTYPE_WF(" \"> ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" &generalE; \"> %parameterE; ] animal>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_NWF(" \"> ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]]> ]>"); - TEST_DOCTYPE_WF(" ]]> ]>"); - TEST_DOCTYPE_NWF(" ]]> ]>"); - TEST_DOCTYPE_WF(" ]]> ]>"); - TEST_DOCTYPE_WF(" ]]> ]>"); - TEST_DOCTYPE_WF(" [INCLUDE ]]> ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_WF(" %paaa; \"> ]>"); - TEST_DOCTYPE_WF(" \"> %paaa; ]>"); - TEST_DOCTYPE_NWF(" \" %paaa; ]>"); - TEST_DOCTYPE_WF(" ]>"); -} - -TEST(parse_doctype_xmlconf_ibm_3) -{ - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(""); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_WF(" IN PI ?> ]>"); - TEST_DOCTYPE_WF(" \"> \"> \"> %make_leopard_element; %make_small; \"> %make_big; %make_attlist; ]>"); - TEST_DOCTYPE_WF("\"> ]]> %rootElement; \"> %make_tiger_element; ]]> ]>"); - TEST_DOCTYPE_WF(" General entity reference in element content\"> ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF("'. These must be balanced ]>"); - TEST_DOCTYPE_WF("'. These must be balanced ]]> ]]> ]>"); - TEST_DOCTYPE_WF("'. These must be balanced ]]> nesting ]]> nesting again ]]> end ]]> ]>"); - TEST_DOCTYPE_WF(" \"> %pe1; ]> ]>"); -} - -TEST(parse_doctype_xmlconf_oasis_1) -{ - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF("\"> ]]> %rootel; ]]> ]>"); - TEST_DOCTYPE_WF(" ]]>]]> ]>"); - TEST_DOCTYPE_WF(" ]]>]]> ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_WF(" ]]> ]>"); - TEST_DOCTYPE_NWF(" ]>"); - TEST_DOCTYPE_WF(" '. These must be balanced, but it is no section keyword is required: ] ]> ]] > ]]> ]]> ]>"); - TEST_DOCTYPE_WF(" ?>/\''\"> ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" "); // not actually a doctype :) - TEST_DOCTYPE_WF(" &a%b&#c?>"); // not actually a doctype :) - TEST_DOCTYPE_WF("]>"); -} - -TEST(parse_doctype_xmlconf_xmltest_1) -{ - TEST_DOCTYPE_NWF(" ]> ]>"); - TEST_DOCTYPE_NWF(" "); - TEST_DOCTYPE_NWF(" "); - TEST_DOCTYPE_WF(" %e; -->"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF(" ]>"); - TEST_DOCTYPE_WF("\"> ]>"); - TEST_DOCTYPE_WF(" \"> ]>"); -} +#include "common.hpp" + +#include + +static bool test_doctype_wf(const std::basic_string& decl) +{ + xml_document doc; + + // standalone + if (!doc.load(decl.c_str()) || (bool)doc.first_child()) return false; + + // pcdata pre/postfix + if (!doc.load((STR("a") + decl).c_str()) || (bool)doc.first_child()) return false; + if (!doc.load((decl + STR("b")).c_str()) || (bool)doc.first_child()) return false; + if (!doc.load((STR("a") + decl + STR("b")).c_str()) || (bool)doc.first_child()) return false; + + // node pre/postfix + if (!doc.load((STR("") + decl).c_str()) || !test_node(doc, STR(""), STR(""), format_raw)) return false; + if (!doc.load((decl + STR("")).c_str()) || !test_node(doc, STR(""), STR(""), format_raw)) return false; + if (!doc.load((STR("") + decl + STR("")).c_str()) || !test_node(doc, STR(""), STR(""), format_raw)) return false; + + // wrap in node to check that doctype is parsed fully (does not leave any "pcdata") + if (!doc.load((STR("") + decl + STR("")).c_str()) || !test_node(doc, STR(""), STR(""), format_raw)) return false; + + return true; +} + +static bool test_doctype_nwf(const std::basic_string& decl) +{ + xml_document doc; + + // standalone + if (doc.load(decl.c_str()).status != status_bad_doctype) return false; + + // pcdata postfix + if (doc.load((decl + STR("b")).c_str()).status != status_bad_doctype) return false; + + // node postfix + if (doc.load((decl + STR("")).c_str()).status != status_bad_doctype) return false; + + return true; +} + +#define TEST_DOCTYPE_WF(contents) CHECK(test_doctype_wf(STR(contents))) +#define TEST_DOCTYPE_NWF(contents) CHECK(test_doctype_nwf(STR(contents))) + +TEST(parse_doctype_skip) +{ + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF("]>"); +} + +TEST(parse_doctype_error) +{ + TEST_DOCTYPE_NWF(""); + TEST_DOCTYPE_NWF("]"); + TEST_DOCTYPE_NWF("] "); +} + +// Examples from W3C recommendations +TEST(parse_doctype_w3c_wf) +{ + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]]> ]]>]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); +} + +TEST(parse_doctype_w3c_nwf) +{ + TEST_DOCTYPE_NWF(""); + TEST_DOCTYPE_NWF(" ]"); + TEST_DOCTYPE_NWF(""); + TEST_DOCTYPE_NWF(" ]"); + TEST_DOCTYPE_NWF(""); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" %e; ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" \"> ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" %pe; %intpe; ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF("]]> ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_NWF(" \"> \"> "); + TEST_DOCTYPE_WF(" ]>"); +} + +TEST(parse_doctype_xmlconf_eduni_2) +{ + TEST_DOCTYPE_WF("\"> %pe; ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); +} + +TEST(parse_doctype_xmlconf_eduni_3) +{ + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF("\"> ]>"); +} + +TEST(parse_doctype_xmlconf_eduni_4) +{ + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); +} + +TEST(parse_doctype_xmlconf_eduni_5) +{ + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); +} + +TEST(parse_doctype_xmlconf_ibm_1) +{ + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" %pe1; ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" \"> %pe3; ]>"); + TEST_DOCTYPE_WF(" '> %pe1; ]>"); +} + +TEST(parse_doctype_xmlconf_ibm_2) +{ + TEST_DOCTYPE_WF(" \"> ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" &generalE; \"> %parameterE; ] animal>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_NWF(" \"> ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]]> ]>"); + TEST_DOCTYPE_WF(" ]]> ]>"); + TEST_DOCTYPE_NWF(" ]]> ]>"); + TEST_DOCTYPE_WF(" ]]> ]>"); + TEST_DOCTYPE_WF(" ]]> ]>"); + TEST_DOCTYPE_WF(" [INCLUDE ]]> ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_WF(" %paaa; \"> ]>"); + TEST_DOCTYPE_WF(" \"> %paaa; ]>"); + TEST_DOCTYPE_NWF(" \" %paaa; ]>"); + TEST_DOCTYPE_WF(" ]>"); +} + +TEST(parse_doctype_xmlconf_ibm_3) +{ + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(""); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_WF(" IN PI ?> ]>"); + TEST_DOCTYPE_WF(" \"> \"> \"> %make_leopard_element; %make_small; \"> %make_big; %make_attlist; ]>"); + TEST_DOCTYPE_WF("\"> ]]> %rootElement; \"> %make_tiger_element; ]]> ]>"); + TEST_DOCTYPE_WF(" General entity reference in element content\"> ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF("'. These must be balanced ]>"); + TEST_DOCTYPE_WF("'. These must be balanced ]]> ]]> ]>"); + TEST_DOCTYPE_WF("'. These must be balanced ]]> nesting ]]> nesting again ]]> end ]]> ]>"); + TEST_DOCTYPE_WF(" \"> %pe1; ]> ]>"); +} + +TEST(parse_doctype_xmlconf_oasis_1) +{ + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF("\"> ]]> %rootel; ]]> ]>"); + TEST_DOCTYPE_WF(" ]]>]]> ]>"); + TEST_DOCTYPE_WF(" ]]>]]> ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_WF(" ]]> ]>"); + TEST_DOCTYPE_NWF(" ]>"); + TEST_DOCTYPE_WF(" '. These must be balanced, but it is no section keyword is required: ] ]> ]] > ]]> ]]> ]>"); + TEST_DOCTYPE_WF(" ?>/\''\"> ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" "); // not actually a doctype :) + TEST_DOCTYPE_WF(" &a%b&#c?>"); // not actually a doctype :) + TEST_DOCTYPE_WF("]>"); +} + +TEST(parse_doctype_xmlconf_xmltest_1) +{ + TEST_DOCTYPE_NWF(" ]> ]>"); + TEST_DOCTYPE_NWF(" "); + TEST_DOCTYPE_NWF(" "); + TEST_DOCTYPE_WF(" %e; -->"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF(" ]>"); + TEST_DOCTYPE_WF("\"> ]>"); + TEST_DOCTYPE_WF(" \"> ]>"); +} diff --git a/tests/test_unicode.cpp b/tests/test_unicode.cpp index ea2494b..0b656a3 100644 --- a/tests/test_unicode.cpp +++ b/tests/test_unicode.cpp @@ -1,137 +1,137 @@ -#ifndef PUGIXML_NO_STL - -#include "common.hpp" - -#include - -// letters taken from http://www.utf8-chartable.de/ - -TEST(as_wide_empty) -{ - CHECK(as_wide("") == L""); -} - -TEST(as_wide_valid_basic) -{ - // valid 1-byte, 2-byte and 3-byte inputs -#ifdef U_LITERALS - CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\u0400\u203D"); -#else - CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\x0400\x203D"); -#endif -} - -TEST(as_wide_valid_astral) -{ - // valid 4-byte input - std::wstring b4 = as_wide("\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); - - size_t wcharsize = sizeof(wchar_t); - - if (wcharsize == 4) - { - CHECK(b4.size() == 3 && b4[0] == wchar_cast(0x97624) && b4[1] == L' ' && b4[2] == wchar_cast(0x1003ff)); - } - else - { - CHECK(b4.size() == 5 && b4[0] == 0xda1d && b4[1] == 0xde24 && b4[2] == L' ' && b4[3] == 0xdbc0 && b4[4] == 0xdfff); - } -} - -TEST(as_wide_invalid) -{ - // invalid 1-byte input - CHECK(as_wide("a\xb0") == L"a"); - CHECK(as_wide("a\xb0_") == L"a_"); - - // invalid 2-byte input - CHECK(as_wide("a\xc0") == L"a"); - CHECK(as_wide("a\xd0") == L"a"); - CHECK(as_wide("a\xc0_") == L"a_"); - CHECK(as_wide("a\xd0_") == L"a_"); - - // invalid 3-byte input - CHECK(as_wide("a\xe2\x80") == L"a"); - CHECK(as_wide("a\xe2") == L"a"); - CHECK(as_wide("a\xe2\x80_") == L"a_"); - CHECK(as_wide("a\xe2_") == L"a_"); - - // invalid 4-byte input - CHECK(as_wide("a\xf2\x97\x98") == L"a"); - CHECK(as_wide("a\xf2\x97") == L"a"); - CHECK(as_wide("a\xf2") == L"a"); - CHECK(as_wide("a\xf2\x97\x98_") == L"a_"); - CHECK(as_wide("a\xf2\x97_") == L"a_"); - CHECK(as_wide("a\xf2_") == L"a_"); - - // invalid 5-byte input - std::wstring b5 = as_wide("\xf8\nbcd"); - CHECK(b5 == L"\nbcd"); -} - -TEST(as_utf8_empty) -{ - CHECK(as_utf8(L"") == ""); -} - -TEST(as_utf8_valid_basic) -{ - // valid 1-byte, 2-byte and 3-byte outputs -#ifdef U_LITERALS - CHECK(as_utf8(L"?\u0400\u203D") == "?\xd0\x80\xe2\x80\xbd"); -#else - CHECK(as_utf8(L"?\x0400\x203D") == "?\xd0\x80\xe2\x80\xbd"); -#endif -} - -TEST(as_utf8_valid_astral) -{ - // valid 4-byte output - size_t wcharsize = sizeof(wchar_t); - - if (wcharsize == 4) - { - std::wstring s; - s.resize(3); - s[0] = wchar_cast(0x97624); - s[1] = ' '; - s[2] = wchar_cast(0x1003ff); - - CHECK(as_utf8(s.c_str()) == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); - } - else - { - #ifdef U_LITERALS - CHECK(as_utf8(L"\uda1d\ude24 \udbc0\udfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); - #else - CHECK(as_utf8(L"\xda1d\xde24 \xdbc0\xdfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); - #endif - } -} - -TEST(as_utf8_invalid) -{ - size_t wcharsize = sizeof(wchar_t); - - if (wcharsize == 2) - { - // check non-terminated degenerate handling - #ifdef U_LITERALS - CHECK(as_utf8(L"a\uda1d") == "a"); - CHECK(as_utf8(L"a\uda1d_") == "a_"); - #else - CHECK(as_utf8(L"a\xda1d") == "a"); - CHECK(as_utf8(L"a\xda1d_") == "a_"); - #endif - - // check incorrect leading code - #ifdef U_LITERALS - CHECK(as_utf8(L"a\ude24") == "a"); - CHECK(as_utf8(L"a\ude24_") == "a_"); - #else - CHECK(as_utf8(L"a\xde24") == "a"); - CHECK(as_utf8(L"a\xde24_") == "a_"); - #endif - } -} -#endif +#ifndef PUGIXML_NO_STL + +#include "common.hpp" + +#include + +// letters taken from http://www.utf8-chartable.de/ + +TEST(as_wide_empty) +{ + CHECK(as_wide("") == L""); +} + +TEST(as_wide_valid_basic) +{ + // valid 1-byte, 2-byte and 3-byte inputs +#ifdef U_LITERALS + CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\u0400\u203D"); +#else + CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\x0400\x203D"); +#endif +} + +TEST(as_wide_valid_astral) +{ + // valid 4-byte input + std::wstring b4 = as_wide("\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); + + size_t wcharsize = sizeof(wchar_t); + + if (wcharsize == 4) + { + CHECK(b4.size() == 3 && b4[0] == wchar_cast(0x97624) && b4[1] == L' ' && b4[2] == wchar_cast(0x1003ff)); + } + else + { + CHECK(b4.size() == 5 && b4[0] == 0xda1d && b4[1] == 0xde24 && b4[2] == L' ' && b4[3] == 0xdbc0 && b4[4] == 0xdfff); + } +} + +TEST(as_wide_invalid) +{ + // invalid 1-byte input + CHECK(as_wide("a\xb0") == L"a"); + CHECK(as_wide("a\xb0_") == L"a_"); + + // invalid 2-byte input + CHECK(as_wide("a\xc0") == L"a"); + CHECK(as_wide("a\xd0") == L"a"); + CHECK(as_wide("a\xc0_") == L"a_"); + CHECK(as_wide("a\xd0_") == L"a_"); + + // invalid 3-byte input + CHECK(as_wide("a\xe2\x80") == L"a"); + CHECK(as_wide("a\xe2") == L"a"); + CHECK(as_wide("a\xe2\x80_") == L"a_"); + CHECK(as_wide("a\xe2_") == L"a_"); + + // invalid 4-byte input + CHECK(as_wide("a\xf2\x97\x98") == L"a"); + CHECK(as_wide("a\xf2\x97") == L"a"); + CHECK(as_wide("a\xf2") == L"a"); + CHECK(as_wide("a\xf2\x97\x98_") == L"a_"); + CHECK(as_wide("a\xf2\x97_") == L"a_"); + CHECK(as_wide("a\xf2_") == L"a_"); + + // invalid 5-byte input + std::wstring b5 = as_wide("\xf8\nbcd"); + CHECK(b5 == L"\nbcd"); +} + +TEST(as_utf8_empty) +{ + CHECK(as_utf8(L"") == ""); +} + +TEST(as_utf8_valid_basic) +{ + // valid 1-byte, 2-byte and 3-byte outputs +#ifdef U_LITERALS + CHECK(as_utf8(L"?\u0400\u203D") == "?\xd0\x80\xe2\x80\xbd"); +#else + CHECK(as_utf8(L"?\x0400\x203D") == "?\xd0\x80\xe2\x80\xbd"); +#endif +} + +TEST(as_utf8_valid_astral) +{ + // valid 4-byte output + size_t wcharsize = sizeof(wchar_t); + + if (wcharsize == 4) + { + std::wstring s; + s.resize(3); + s[0] = wchar_cast(0x97624); + s[1] = ' '; + s[2] = wchar_cast(0x1003ff); + + CHECK(as_utf8(s.c_str()) == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); + } + else + { + #ifdef U_LITERALS + CHECK(as_utf8(L"\uda1d\ude24 \udbc0\udfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); + #else + CHECK(as_utf8(L"\xda1d\xde24 \xdbc0\xdfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); + #endif + } +} + +TEST(as_utf8_invalid) +{ + size_t wcharsize = sizeof(wchar_t); + + if (wcharsize == 2) + { + // check non-terminated degenerate handling + #ifdef U_LITERALS + CHECK(as_utf8(L"a\uda1d") == "a"); + CHECK(as_utf8(L"a\uda1d_") == "a_"); + #else + CHECK(as_utf8(L"a\xda1d") == "a"); + CHECK(as_utf8(L"a\xda1d_") == "a_"); + #endif + + // check incorrect leading code + #ifdef U_LITERALS + CHECK(as_utf8(L"a\ude24") == "a"); + CHECK(as_utf8(L"a\ude24_") == "a_"); + #else + CHECK(as_utf8(L"a\xde24") == "a"); + CHECK(as_utf8(L"a\xde24_") == "a_"); + #endif + } +} +#endif diff --git a/tests/test_write.cpp b/tests/test_write.cpp index cb75c74..b7d8412 100644 --- a/tests/test_write.cpp +++ b/tests/test_write.cpp @@ -1,354 +1,354 @@ -#include "common.hpp" - -#include "writer_string.hpp" - -#include -#include - -TEST_XML(write_simple, "text") -{ - CHECK_NODE_EX(doc, STR("\ntext\n\n"), STR(""), 0); -} - -TEST_XML(write_raw, "text") -{ - CHECK_NODE_EX(doc, STR("text"), STR(""), format_raw); -} - -TEST_XML(write_indent, "text") -{ - CHECK_NODE_EX(doc, STR("\n\t\n\t\ttext\n\t\n\n"), STR("\t"), format_indent); -} - -TEST_XML(write_pcdata, "text") -{ - CHECK_NODE_EX(doc, STR("\n\t\n\t\t\n\t\ttext\n\t\n\n"), STR("\t"), format_indent); -} - -TEST_XML(write_cdata, "") -{ - CHECK_NODE(doc, STR("")); - CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); -} - -TEST_XML_FLAGS(write_comment, "", parse_default | parse_comments) -{ - CHECK_NODE(doc, STR("")); - CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); -} - -TEST_XML_FLAGS(write_pi, "", parse_default | parse_pi) -{ - CHECK_NODE(doc, STR("")); - CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); -} - -TEST_XML_FLAGS(write_declaration, "", parse_default | parse_declaration) -{ - CHECK_NODE(doc, STR("")); - CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); -} - -TEST_XML(write_escape, "text") -{ - doc.child(STR("node")).attribute(STR("attr")) = STR("<>'\"&\x04\r\n\t"); - doc.child(STR("node")).first_child().set_value(STR("<>'\"&\x04\r\n\t")); - - CHECK_NODE(doc, STR("<>'\"&\r\n\t")); -} - -TEST_XML(write_escape_unicode, "") -{ -#ifdef PUGIXML_WCHAR_MODE - #ifdef U_LITERALS - CHECK_NODE(doc, STR("")); - #else - CHECK_NODE(doc, STR("")); - #endif -#else - CHECK_NODE(doc, STR("")); -#endif -} - -struct test_writer: xml_writer -{ - std::basic_string contents; - - virtual void write(const void* data, size_t size) - { - CHECK(size % sizeof(pugi::char_t) == 0); - contents += std::basic_string(static_cast(data), static_cast(data) + size / sizeof(pugi::char_t)); - } -}; - -TEST_XML(write_print_writer, "") -{ - test_writer writer; - doc.print(writer, STR(""), format_default, get_native_encoding()); - - CHECK(writer.contents == STR("\n")); -} - -#ifndef PUGIXML_NO_STL -TEST_XML(write_print_stream, "") -{ - std::ostringstream oss; - doc.print(oss, STR(""), format_default, encoding_utf8); - - CHECK(oss.str() == "\n"); -} - -TEST_XML(write_print_stream_encode, "") -{ - std::ostringstream oss; - doc.print(oss, STR(""), format_default, encoding_utf16_be); - - CHECK(oss.str() == std::string("\x00<\x00n\x00 \x00/\x00>\x00\n", 12)); -} - -TEST_XML(write_print_stream_wide, "") -{ - std::basic_ostringstream oss; - doc.print(oss, STR(""), format_default, encoding_utf8); - - CHECK(oss.str() == L"\n"); -} -#endif - -TEST_XML(write_huge_chunk, "") -{ - std::basic_string name(10000, STR('n')); - doc.child(STR("node")).set_name(name.c_str()); - - test_writer writer; - doc.print(writer, STR(""), format_default, get_native_encoding()); - - CHECK(writer.contents == STR("<") + name + STR(" />\n")); -} - -TEST(write_encodings) -{ - static char s_utf8[] = "<\x54\xC2\xA2\xE2\x82\xAC\xF0\xA4\xAD\xA2/>"; - - xml_document doc; - CHECK(doc.load_buffer(s_utf8, sizeof(s_utf8), parse_default, encoding_utf8)); - - CHECK(write_narrow(doc, format_default, encoding_utf8) == "<\x54\xC2\xA2\xE2\x82\xAC\xF0\xA4\xAD\xA2 />\n"); - - CHECK(test_write_narrow(doc, format_default, encoding_utf32_le, "<\x00\x00\x00\x54\x00\x00\x00\xA2\x00\x00\x00\xAC\x20\x00\x00\x62\x4B\x02\x00 \x00\x00\x00/\x00\x00\x00>\x00\x00\x00\n\x00\x00\x00", 36)); - CHECK(test_write_narrow(doc, format_default, encoding_utf32_be, "\x00\x00\x00<\x00\x00\x00\x54\x00\x00\x00\xA2\x00\x00\x20\xAC\x00\x02\x4B\x62\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>\x00\x00\x00\n", 36)); - CHECK(write_narrow(doc, format_default, encoding_utf32) == write_narrow(doc, format_default, is_little_endian() ? encoding_utf32_le : encoding_utf32_be)); - - CHECK(test_write_narrow(doc, format_default, encoding_utf16_le, "<\x00\x54\x00\xA2\x00\xAC\x20\x52\xd8\x62\xdf \x00/\x00>\x00\n\x00", 20)); - CHECK(test_write_narrow(doc, format_default, encoding_utf16_be, "\x00<\x00\x54\x00\xA2\x20\xAC\xd8\x52\xdf\x62\x00 \x00/\x00>\x00\n", 20)); - CHECK(write_narrow(doc, format_default, encoding_utf16) == write_narrow(doc, format_default, is_little_endian() ? encoding_utf16_le : encoding_utf16_be)); - - size_t wcharsize = sizeof(wchar_t); - std::wstring v = write_wide(doc, format_default, encoding_wchar); - - if (wcharsize == 4) - { - CHECK(v.size() == 9 && v[0] == '<' && v[1] == 0x54 && v[2] == 0xA2 && v[3] == 0x20AC && v[4] == wchar_cast(0x24B62) && v[5] == ' ' && v[6] == '/' && v[7] == '>' && v[8] == '\n'); - } - else - { - CHECK(v.size() == 10 && v[0] == '<' && v[1] == 0x54 && v[2] == 0xA2 && v[3] == 0x20AC && v[4] == 0xd852 && v[5] == 0xdf62 && v[6] == ' ' && v[7] == '/' && v[8] == '>' && v[9] == '\n'); - } -} - -#ifdef PUGIXML_WCHAR_MODE -TEST(write_encoding_huge) -{ - const unsigned int N = 16000; - - // make a large utf16 name consisting of 6-byte char pairs (6 does not divide internal buffer size, so will need split correction) - std::string s_utf16 = std::string("\x00<", 2); - - for (unsigned int i = 0; i < N; ++i) s_utf16 += "\x20\xAC\xd8\x52\xdf\x62"; - - s_utf16 += std::string("\x00/\x00>", 4); - - xml_document doc; - CHECK(doc.load_buffer(&s_utf16[0], s_utf16.length(), parse_default, encoding_utf16_be)); - - std::string s_utf8 = "<"; - - for (unsigned int j = 0; j < N; ++j) s_utf8 += "\xE2\x82\xAC\xF0\xA4\xAD\xA2"; - - s_utf8 += " />\n"; - - CHECK(test_write_narrow(doc, format_default, encoding_utf8, s_utf8.c_str(), s_utf8.length())); -} - -TEST(write_encoding_huge_invalid) -{ - size_t wcharsize = sizeof(wchar_t); - - if (wcharsize == 2) - { - const unsigned int N = 16000; - - // make a large utf16 name consisting of leading surrogate chars - std::basic_string s_utf16; - - for (unsigned int i = 0; i < N; ++i) s_utf16 += static_cast(0xd852); - - xml_document doc; - doc.append_child().set_name(s_utf16.c_str()); - - CHECK(test_write_narrow(doc, format_default, encoding_utf8, "< />\n", 5)); - } -} -#else -TEST(write_encoding_huge) -{ - const unsigned int N = 16000; - - // make a large utf8 name consisting of 3-byte chars (3 does not divide internal buffer size, so will need split correction) - std::string s_utf8 = "<"; - - for (unsigned int i = 0; i < N; ++i) s_utf8 += "\xE2\x82\xAC"; - - s_utf8 += "/>"; - - xml_document doc; - CHECK(doc.load_buffer(&s_utf8[0], s_utf8.length(), parse_default, encoding_utf8)); - - std::string s_utf16 = std::string("\x00<", 2); - - for (unsigned int j = 0; j < N; ++j) s_utf16 += "\x20\xAC"; - - s_utf16 += std::string("\x00 \x00/\x00>\x00\n", 8); - - CHECK(test_write_narrow(doc, format_default, encoding_utf16_be, s_utf16.c_str(), s_utf16.length())); -} - -TEST(write_encoding_huge_invalid) -{ - const unsigned int N = 16000; - - // make a large utf8 name consisting of non-leading chars - std::string s_utf8; - - for (unsigned int i = 0; i < N; ++i) s_utf8 += "\x82"; - - xml_document doc; - doc.append_child().set_name(s_utf8.c_str()); - - std::string s_utf16 = std::string("\x00<\x00 \x00/\x00>\x00\n", 10); - - CHECK(test_write_narrow(doc, format_default, encoding_utf16_be, s_utf16.c_str(), s_utf16.length())); -} -#endif - -TEST(write_unicode_escape) -{ - char s_utf8[] = "<\xE2\x82\xAC \xC2\xA2='\"\xF0\xA4\xAD\xA2 \"'>&\x14\xF0\xA4\xAD\xA2<"; - - xml_document doc; - CHECK(doc.load_buffer(s_utf8, sizeof(s_utf8), parse_default, encoding_utf8)); - - CHECK(write_narrow(doc, format_default, encoding_utf8) == "<\xE2\x82\xAC \xC2\xA2=\""\xF0\xA4\xAD\xA2 "\">&\xF0\xA4\xAD\xA2<\n"); -} - -#ifdef PUGIXML_WCHAR_MODE -static bool test_write_unicode_invalid(const wchar_t* name, const char* expected) -{ - xml_document doc; - doc.append_child(node_pcdata).set_value(name); - - return write_narrow(doc, format_raw, encoding_utf8) == expected; -} - -TEST(write_unicode_invalid_utf16) -{ - size_t wcharsize = sizeof(wchar_t); - - if (wcharsize == 2) - { - // check non-terminated degenerate handling - #ifdef U_LITERALS - CHECK(test_write_unicode_invalid(L"a\uda1d", "a")); - CHECK(test_write_unicode_invalid(L"a\uda1d_", "a_")); - #else - CHECK(test_write_unicode_invalid(L"a\xda1d", "a")); - CHECK(test_write_unicode_invalid(L"a\xda1d_", "a_")); - #endif - - // check incorrect leading code - #ifdef U_LITERALS - CHECK(test_write_unicode_invalid(L"a\ude24", "a")); - CHECK(test_write_unicode_invalid(L"a\ude24_", "a_")); - #else - CHECK(test_write_unicode_invalid(L"a\xde24", "a")); - CHECK(test_write_unicode_invalid(L"a\xde24_", "a_")); - #endif - } -} -#else -static bool test_write_unicode_invalid(const char* name, const wchar_t* expected) -{ - xml_document doc; - doc.append_child(node_pcdata).set_value(name); - - return write_wide(doc, format_raw, encoding_wchar) == expected; -} - -TEST(write_unicode_invalid_utf8) -{ - // invalid 1-byte input - CHECK(test_write_unicode_invalid("a\xb0", L"a")); - CHECK(test_write_unicode_invalid("a\xb0_", L"a_")); - - // invalid 2-byte input - CHECK(test_write_unicode_invalid("a\xc0", L"a")); - CHECK(test_write_unicode_invalid("a\xd0", L"a")); - CHECK(test_write_unicode_invalid("a\xc0_", L"a_")); - CHECK(test_write_unicode_invalid("a\xd0_", L"a_")); - - // invalid 3-byte input - CHECK(test_write_unicode_invalid("a\xe2\x80", L"a")); - CHECK(test_write_unicode_invalid("a\xe2", L"a")); - CHECK(test_write_unicode_invalid("a\xe2\x80_", L"a_")); - CHECK(test_write_unicode_invalid("a\xe2_", L"a_")); - - // invalid 4-byte input - CHECK(test_write_unicode_invalid("a\xf2\x97\x98", L"a")); - CHECK(test_write_unicode_invalid("a\xf2\x97", L"a")); - CHECK(test_write_unicode_invalid("a\xf2", L"a")); - CHECK(test_write_unicode_invalid("a\xf2\x97\x98_", L"a_")); - CHECK(test_write_unicode_invalid("a\xf2\x97_", L"a_")); - CHECK(test_write_unicode_invalid("a\xf2_", L"a_")); - - // invalid 5-byte input - CHECK(test_write_unicode_invalid("a\xf8_", L"a_")); -} -#endif - -TEST(write_no_name_element) -{ - xml_document doc; - xml_node root = doc.append_child(); - root.append_child(); - root.append_child().append_child(node_pcdata).set_value(STR("text")); - - CHECK_NODE(doc, STR("<:anonymous><:anonymous /><:anonymous>text")); - CHECK_NODE_EX(doc, STR("<:anonymous>\n\t<:anonymous />\n\t<:anonymous>text\n\n"), STR("\t"), format_default); -} - -TEST(write_no_name_pi) -{ - xml_document doc; - doc.append_child(node_pi); - - CHECK_NODE(doc, STR("")); -} - -TEST(write_no_name_attribute) -{ - xml_document doc; - doc.append_child().set_name(STR("root")); - doc.child(STR("root")).append_attribute(STR("")); - - CHECK_NODE(doc, STR("")); -} +#include "common.hpp" + +#include "writer_string.hpp" + +#include +#include + +TEST_XML(write_simple, "text") +{ + CHECK_NODE_EX(doc, STR("\ntext\n\n"), STR(""), 0); +} + +TEST_XML(write_raw, "text") +{ + CHECK_NODE_EX(doc, STR("text"), STR(""), format_raw); +} + +TEST_XML(write_indent, "text") +{ + CHECK_NODE_EX(doc, STR("\n\t\n\t\ttext\n\t\n\n"), STR("\t"), format_indent); +} + +TEST_XML(write_pcdata, "text") +{ + CHECK_NODE_EX(doc, STR("\n\t\n\t\t\n\t\ttext\n\t\n\n"), STR("\t"), format_indent); +} + +TEST_XML(write_cdata, "") +{ + CHECK_NODE(doc, STR("")); + CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); +} + +TEST_XML_FLAGS(write_comment, "", parse_default | parse_comments) +{ + CHECK_NODE(doc, STR("")); + CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); +} + +TEST_XML_FLAGS(write_pi, "", parse_default | parse_pi) +{ + CHECK_NODE(doc, STR("")); + CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); +} + +TEST_XML_FLAGS(write_declaration, "", parse_default | parse_declaration) +{ + CHECK_NODE(doc, STR("")); + CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); +} + +TEST_XML(write_escape, "text") +{ + doc.child(STR("node")).attribute(STR("attr")) = STR("<>'\"&\x04\r\n\t"); + doc.child(STR("node")).first_child().set_value(STR("<>'\"&\x04\r\n\t")); + + CHECK_NODE(doc, STR("<>'\"&\r\n\t")); +} + +TEST_XML(write_escape_unicode, "") +{ +#ifdef PUGIXML_WCHAR_MODE + #ifdef U_LITERALS + CHECK_NODE(doc, STR("")); + #else + CHECK_NODE(doc, STR("")); + #endif +#else + CHECK_NODE(doc, STR("")); +#endif +} + +struct test_writer: xml_writer +{ + std::basic_string contents; + + virtual void write(const void* data, size_t size) + { + CHECK(size % sizeof(pugi::char_t) == 0); + contents += std::basic_string(static_cast(data), static_cast(data) + size / sizeof(pugi::char_t)); + } +}; + +TEST_XML(write_print_writer, "") +{ + test_writer writer; + doc.print(writer, STR(""), format_default, get_native_encoding()); + + CHECK(writer.contents == STR("\n")); +} + +#ifndef PUGIXML_NO_STL +TEST_XML(write_print_stream, "") +{ + std::ostringstream oss; + doc.print(oss, STR(""), format_default, encoding_utf8); + + CHECK(oss.str() == "\n"); +} + +TEST_XML(write_print_stream_encode, "") +{ + std::ostringstream oss; + doc.print(oss, STR(""), format_default, encoding_utf16_be); + + CHECK(oss.str() == std::string("\x00<\x00n\x00 \x00/\x00>\x00\n", 12)); +} + +TEST_XML(write_print_stream_wide, "") +{ + std::basic_ostringstream oss; + doc.print(oss, STR(""), format_default, encoding_utf8); + + CHECK(oss.str() == L"\n"); +} +#endif + +TEST_XML(write_huge_chunk, "") +{ + std::basic_string name(10000, STR('n')); + doc.child(STR("node")).set_name(name.c_str()); + + test_writer writer; + doc.print(writer, STR(""), format_default, get_native_encoding()); + + CHECK(writer.contents == STR("<") + name + STR(" />\n")); +} + +TEST(write_encodings) +{ + static char s_utf8[] = "<\x54\xC2\xA2\xE2\x82\xAC\xF0\xA4\xAD\xA2/>"; + + xml_document doc; + CHECK(doc.load_buffer(s_utf8, sizeof(s_utf8), parse_default, encoding_utf8)); + + CHECK(write_narrow(doc, format_default, encoding_utf8) == "<\x54\xC2\xA2\xE2\x82\xAC\xF0\xA4\xAD\xA2 />\n"); + + CHECK(test_write_narrow(doc, format_default, encoding_utf32_le, "<\x00\x00\x00\x54\x00\x00\x00\xA2\x00\x00\x00\xAC\x20\x00\x00\x62\x4B\x02\x00 \x00\x00\x00/\x00\x00\x00>\x00\x00\x00\n\x00\x00\x00", 36)); + CHECK(test_write_narrow(doc, format_default, encoding_utf32_be, "\x00\x00\x00<\x00\x00\x00\x54\x00\x00\x00\xA2\x00\x00\x20\xAC\x00\x02\x4B\x62\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>\x00\x00\x00\n", 36)); + CHECK(write_narrow(doc, format_default, encoding_utf32) == write_narrow(doc, format_default, is_little_endian() ? encoding_utf32_le : encoding_utf32_be)); + + CHECK(test_write_narrow(doc, format_default, encoding_utf16_le, "<\x00\x54\x00\xA2\x00\xAC\x20\x52\xd8\x62\xdf \x00/\x00>\x00\n\x00", 20)); + CHECK(test_write_narrow(doc, format_default, encoding_utf16_be, "\x00<\x00\x54\x00\xA2\x20\xAC\xd8\x52\xdf\x62\x00 \x00/\x00>\x00\n", 20)); + CHECK(write_narrow(doc, format_default, encoding_utf16) == write_narrow(doc, format_default, is_little_endian() ? encoding_utf16_le : encoding_utf16_be)); + + size_t wcharsize = sizeof(wchar_t); + std::wstring v = write_wide(doc, format_default, encoding_wchar); + + if (wcharsize == 4) + { + CHECK(v.size() == 9 && v[0] == '<' && v[1] == 0x54 && v[2] == 0xA2 && v[3] == 0x20AC && v[4] == wchar_cast(0x24B62) && v[5] == ' ' && v[6] == '/' && v[7] == '>' && v[8] == '\n'); + } + else + { + CHECK(v.size() == 10 && v[0] == '<' && v[1] == 0x54 && v[2] == 0xA2 && v[3] == 0x20AC && v[4] == 0xd852 && v[5] == 0xdf62 && v[6] == ' ' && v[7] == '/' && v[8] == '>' && v[9] == '\n'); + } +} + +#ifdef PUGIXML_WCHAR_MODE +TEST(write_encoding_huge) +{ + const unsigned int N = 16000; + + // make a large utf16 name consisting of 6-byte char pairs (6 does not divide internal buffer size, so will need split correction) + std::string s_utf16 = std::string("\x00<", 2); + + for (unsigned int i = 0; i < N; ++i) s_utf16 += "\x20\xAC\xd8\x52\xdf\x62"; + + s_utf16 += std::string("\x00/\x00>", 4); + + xml_document doc; + CHECK(doc.load_buffer(&s_utf16[0], s_utf16.length(), parse_default, encoding_utf16_be)); + + std::string s_utf8 = "<"; + + for (unsigned int j = 0; j < N; ++j) s_utf8 += "\xE2\x82\xAC\xF0\xA4\xAD\xA2"; + + s_utf8 += " />\n"; + + CHECK(test_write_narrow(doc, format_default, encoding_utf8, s_utf8.c_str(), s_utf8.length())); +} + +TEST(write_encoding_huge_invalid) +{ + size_t wcharsize = sizeof(wchar_t); + + if (wcharsize == 2) + { + const unsigned int N = 16000; + + // make a large utf16 name consisting of leading surrogate chars + std::basic_string s_utf16; + + for (unsigned int i = 0; i < N; ++i) s_utf16 += static_cast(0xd852); + + xml_document doc; + doc.append_child().set_name(s_utf16.c_str()); + + CHECK(test_write_narrow(doc, format_default, encoding_utf8, "< />\n", 5)); + } +} +#else +TEST(write_encoding_huge) +{ + const unsigned int N = 16000; + + // make a large utf8 name consisting of 3-byte chars (3 does not divide internal buffer size, so will need split correction) + std::string s_utf8 = "<"; + + for (unsigned int i = 0; i < N; ++i) s_utf8 += "\xE2\x82\xAC"; + + s_utf8 += "/>"; + + xml_document doc; + CHECK(doc.load_buffer(&s_utf8[0], s_utf8.length(), parse_default, encoding_utf8)); + + std::string s_utf16 = std::string("\x00<", 2); + + for (unsigned int j = 0; j < N; ++j) s_utf16 += "\x20\xAC"; + + s_utf16 += std::string("\x00 \x00/\x00>\x00\n", 8); + + CHECK(test_write_narrow(doc, format_default, encoding_utf16_be, s_utf16.c_str(), s_utf16.length())); +} + +TEST(write_encoding_huge_invalid) +{ + const unsigned int N = 16000; + + // make a large utf8 name consisting of non-leading chars + std::string s_utf8; + + for (unsigned int i = 0; i < N; ++i) s_utf8 += "\x82"; + + xml_document doc; + doc.append_child().set_name(s_utf8.c_str()); + + std::string s_utf16 = std::string("\x00<\x00 \x00/\x00>\x00\n", 10); + + CHECK(test_write_narrow(doc, format_default, encoding_utf16_be, s_utf16.c_str(), s_utf16.length())); +} +#endif + +TEST(write_unicode_escape) +{ + char s_utf8[] = "<\xE2\x82\xAC \xC2\xA2='\"\xF0\xA4\xAD\xA2 \"'>&\x14\xF0\xA4\xAD\xA2<"; + + xml_document doc; + CHECK(doc.load_buffer(s_utf8, sizeof(s_utf8), parse_default, encoding_utf8)); + + CHECK(write_narrow(doc, format_default, encoding_utf8) == "<\xE2\x82\xAC \xC2\xA2=\""\xF0\xA4\xAD\xA2 "\">&\xF0\xA4\xAD\xA2<\n"); +} + +#ifdef PUGIXML_WCHAR_MODE +static bool test_write_unicode_invalid(const wchar_t* name, const char* expected) +{ + xml_document doc; + doc.append_child(node_pcdata).set_value(name); + + return write_narrow(doc, format_raw, encoding_utf8) == expected; +} + +TEST(write_unicode_invalid_utf16) +{ + size_t wcharsize = sizeof(wchar_t); + + if (wcharsize == 2) + { + // check non-terminated degenerate handling + #ifdef U_LITERALS + CHECK(test_write_unicode_invalid(L"a\uda1d", "a")); + CHECK(test_write_unicode_invalid(L"a\uda1d_", "a_")); + #else + CHECK(test_write_unicode_invalid(L"a\xda1d", "a")); + CHECK(test_write_unicode_invalid(L"a\xda1d_", "a_")); + #endif + + // check incorrect leading code + #ifdef U_LITERALS + CHECK(test_write_unicode_invalid(L"a\ude24", "a")); + CHECK(test_write_unicode_invalid(L"a\ude24_", "a_")); + #else + CHECK(test_write_unicode_invalid(L"a\xde24", "a")); + CHECK(test_write_unicode_invalid(L"a\xde24_", "a_")); + #endif + } +} +#else +static bool test_write_unicode_invalid(const char* name, const wchar_t* expected) +{ + xml_document doc; + doc.append_child(node_pcdata).set_value(name); + + return write_wide(doc, format_raw, encoding_wchar) == expected; +} + +TEST(write_unicode_invalid_utf8) +{ + // invalid 1-byte input + CHECK(test_write_unicode_invalid("a\xb0", L"a")); + CHECK(test_write_unicode_invalid("a\xb0_", L"a_")); + + // invalid 2-byte input + CHECK(test_write_unicode_invalid("a\xc0", L"a")); + CHECK(test_write_unicode_invalid("a\xd0", L"a")); + CHECK(test_write_unicode_invalid("a\xc0_", L"a_")); + CHECK(test_write_unicode_invalid("a\xd0_", L"a_")); + + // invalid 3-byte input + CHECK(test_write_unicode_invalid("a\xe2\x80", L"a")); + CHECK(test_write_unicode_invalid("a\xe2", L"a")); + CHECK(test_write_unicode_invalid("a\xe2\x80_", L"a_")); + CHECK(test_write_unicode_invalid("a\xe2_", L"a_")); + + // invalid 4-byte input + CHECK(test_write_unicode_invalid("a\xf2\x97\x98", L"a")); + CHECK(test_write_unicode_invalid("a\xf2\x97", L"a")); + CHECK(test_write_unicode_invalid("a\xf2", L"a")); + CHECK(test_write_unicode_invalid("a\xf2\x97\x98_", L"a_")); + CHECK(test_write_unicode_invalid("a\xf2\x97_", L"a_")); + CHECK(test_write_unicode_invalid("a\xf2_", L"a_")); + + // invalid 5-byte input + CHECK(test_write_unicode_invalid("a\xf8_", L"a_")); +} +#endif + +TEST(write_no_name_element) +{ + xml_document doc; + xml_node root = doc.append_child(); + root.append_child(); + root.append_child().append_child(node_pcdata).set_value(STR("text")); + + CHECK_NODE(doc, STR("<:anonymous><:anonymous /><:anonymous>text")); + CHECK_NODE_EX(doc, STR("<:anonymous>\n\t<:anonymous />\n\t<:anonymous>text\n\n"), STR("\t"), format_default); +} + +TEST(write_no_name_pi) +{ + xml_document doc; + doc.append_child(node_pi); + + CHECK_NODE(doc, STR("")); +} + +TEST(write_no_name_attribute) +{ + xml_document doc; + doc.append_child().set_name(STR("root")); + doc.child(STR("root")).append_attribute(STR("")); + + CHECK_NODE(doc, STR("")); +} diff --git a/tests/test_xpath.cpp b/tests/test_xpath.cpp index 7b52437..608859f 100644 --- a/tests/test_xpath.cpp +++ b/tests/test_xpath.cpp @@ -1,227 +1,227 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -#include -#include -#include - -#include - -TEST(xpath_allocator_many_pages) -{ - pugi::string_t query = STR("0"); - - for (int i = 0; i < 128; ++i) query += STR("+string-length('abcdefgh')"); - - CHECK_XPATH_NUMBER(xml_node(), query.c_str(), 1024); -} - -TEST(xpath_allocator_large_page) -{ - pugi::string_t query; - - for (int i = 0; i < 1024; ++i) query += STR("abcdefgh"); - - CHECK_XPATH_NUMBER(xml_node(), (STR("string-length('") + query + STR("')")).c_str(), 8192); -} - -TEST_XML(xpath_sort_complex, "test") -{ - // just some random union order, it should not matter probably? - xpath_node_set ns = doc.child(STR("node")).select_nodes(STR("child1 | child2 | child1/@* | . | child2/@* | child2/text()")); - - ns.sort(false); - xpath_node_set sorted = ns; - - ns.sort(true); - xpath_node_set reverse_sorted = ns; - - xpath_node_set_tester(sorted, "sorted order failed") % 2 % 3 % 4 % 5 % 6 % 7 % 8; - xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 8 % 7 % 6 % 5 % 4 % 3 % 2; -} - -TEST_XML(xpath_sort_children, "") -{ - xpath_node_set ns = doc.child(STR("node")).select_nodes(STR("child/subchild[@id=1] | child/subchild[@id=2]")); - - ns.sort(false); - xpath_node_set sorted = ns; - - ns.sort(true); - xpath_node_set reverse_sorted = ns; - - xpath_node_set_tester(sorted, "sorted order failed") % 4 % 7; - xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 7 % 4; -} - -TEST_XML(xpath_sort_attributes, "") -{ - xml_node n = doc.child(STR("node")); - - // we need to insert attributes manually since unsorted node sets are (always?) sorted via pointers because of remove_duplicates, - // so we need to have different document and pointer order to cover all comparator cases - n.append_attribute(STR("attr2")); - n.append_attribute(STR("attr3")); - n.insert_attribute_before(STR("attr1"), n.attribute(STR("attr2"))); - - xpath_node_set ns = n.select_nodes(STR("@*")); - - ns.sort(true); - xpath_node_set reverse_sorted = ns; - - ns.sort(false); - xpath_node_set sorted = ns; - - xpath_node_set_tester(sorted, "sorted order failed") % 3 % 4 % 5; - xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 5 % 4 % 3; -} - -TEST(xpath_long_numbers_parse) -{ - const pugi::char_t* str_flt_max = STR("340282346638528860000000000000000000000"); - const pugi::char_t* str_flt_max_dec = STR("340282346638528860000000000000000000000.000000"); - - const pugi::char_t* str_dbl_max = STR("179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"); - const pugi::char_t* str_dbl_max_dec = STR("179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.000000"); - - xml_node c; - - // check parsing - CHECK_XPATH_NUMBER(c, str_flt_max, FLT_MAX); - CHECK_XPATH_NUMBER(c, str_flt_max_dec, FLT_MAX); - CHECK_XPATH_NUMBER(c, str_dbl_max, DBL_MAX); - CHECK_XPATH_NUMBER(c, str_dbl_max_dec, DBL_MAX); -} - -static bool test_xpath_string_prefix(const pugi::xml_node& node, const pugi::char_t* query, const pugi::char_t* expected, size_t match_length) -{ -#ifdef PUGIXML_WCHAR_MODE - size_t expected_length = wcslen(expected); -#else - size_t expected_length = strlen(expected); -#endif - - pugi::xpath_query q(query); - pugi::string_t value = q.evaluate_string(node); - - return value.length() == expected_length && value.compare(0, match_length, expected, match_length) == 0; -} - -TEST(xpath_long_numbers_stringize) -{ - const pugi::char_t* str_flt_max = STR("340282346638528860000000000000000000000"); - const pugi::char_t* str_flt_max_dec = STR("340282346638528860000000000000000000000.000000"); - - const pugi::char_t* str_dbl_max = STR("179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"); - const pugi::char_t* str_dbl_max_dec = STR("179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.000000"); - - xml_node c; - - CHECK(test_xpath_string_prefix(c, str_flt_max, str_flt_max, 15)); - CHECK(test_xpath_string_prefix(c, str_flt_max_dec, str_flt_max, 15)); - - CHECK(test_xpath_string_prefix(c, str_dbl_max, str_dbl_max, 15)); - CHECK(test_xpath_string_prefix(c, str_dbl_max_dec, str_dbl_max, 15)); -} - -#include - -TEST(xpath_denorm_numbers) -{ - pugi::string_t query; - - // 10^-318 - double denormal - for (int i = 0; i < 106; ++i) - { - if (i != 0) query += STR(" * "); - query += STR("0.001"); - } - - CHECK_XPATH_STRING(xml_node(), query.c_str(), STR("0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009999987484955998")); -} - -TEST_XML(xpath_rexml_1, "") -{ - CHECK_XPATH_NODESET(doc, STR("//*[local-name()='c' and @id='b']")) % 6; - CHECK_XPATH_NODESET(doc, STR("//*[ local-name()='c' and @id='b' ]")) % 6; - CHECK_XPATH_NODESET(doc, STR("/a/c[@id]")) % 6; - CHECK_XPATH_NODESET(doc, STR("/a/c[(@id)]")) % 6; - CHECK_XPATH_NODESET(doc, STR("/a/c[ @id ]")) % 6; - CHECK_XPATH_NODESET(doc, STR("/a/c[ (@id) ]")) % 6; - CHECK_XPATH_NODESET(doc, STR("/a/c[( @id )]")) % 6; - CHECK_XPATH_NODESET(doc, STR("/a/c[ ( @id ) ]")) % 6; - CHECK_XPATH_NODESET(doc, STR("/a/c [ ( @id ) ] ")) % 6; - CHECK_XPATH_NODESET(doc, STR(" / a / c [ ( @id ) ] ")) % 6; -} - -TEST_XML(xpath_rexml_2, "zzz") -{ - CHECK_XPATH_NODESET(doc, STR("a:x/a:y[@p='p' and @q='q']/a:z/text()")) % 8; -} - -TEST_XML(xpath_rexml_3, "
free flowing text.
free flowing text.
free flowing text.
") -{ - CHECK_XPATH_NODESET(doc, STR("//section[../self::section[@role=\"division\"]]")) % 10 % 15; - CHECK_XPATH_NODESET(doc, STR("//section[@role=\"subdivision\" and not(../self::section[@role=\"division\"])]")) % 3; - CHECK_XPATH_NODESET(doc, STR("//section[@role=\"subdivision\"][not(../self::section[@role=\"division\"])]")) % 3; -} - -TEST_XML_FLAGS(xpath_rexml_4, "TEXT1TEXT2", parse_default | parse_comments) -{ - CHECK_XPATH_NODESET(doc, STR("/descendant-or-self::node()[count(child::node()|following-sibling::node()|preceding-sibling::node())=0]")) % 6 % 17 % 20; -} - -TEST_XML(xpath_rexml_5, "") -{ - CHECK_XPATH_FAIL(STR(".//[@id]")); - CHECK_XPATH_NODESET(doc, STR(".//self::*[@id]")) % 4 % 6; - CHECK_XPATH_NODESET(doc, STR(".//node()[@id]")) % 4 % 6; -} - -TEST_XML(xpath_rexml_6, "
ab
") -{ - CHECK_XPATH_NODESET(doc, STR("//em|//strong")) % 4 % 6; - CHECK_XPATH_NODESET(doc, STR("//*[self::em | self::strong]")) % 4 % 6; - CHECK_XPATH_NODESET(doc, STR("//*[name()=\"em\" or name()=\"strong\"]")) % 4 % 6; - CHECK_XPATH_NODESET(doc, STR("//*[self::em or self::strong]")) % 4 % 6; -} - -TEST_XML(xpath_xsl_list_1, "whatevertextselectsomething") -{ - // if I'm not last, and the next input/type isn't select - CHECK_XPATH_NODESET(doc, STR("input[type[parent::input/following-sibling::input[1]/type != 'select']]")) % 2 % 8; - CHECK_XPATH_NODESET(doc, STR("input[type[../following-sibling::input[1]/type != 'select']]")) % 2 % 8; - - CHECK_XPATH_NODESET(doc, STR("input[position()+1]")); -} - -TEST_XML(xpath_xsl_list_2, "
") -{ - CHECK_XPATH_FAIL(STR(".[not(.=ancestor::TR/TD[15]/node())]")); - - CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("1")).first_child(), STR("self::node()[not(.=ancestor::TR/TD[3]/node())]")) % 5; - CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("2")).first_child(), STR("self::node()[not(.=ancestor::TR/TD[3]/node())]")) % 8; - CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("3")).first_child(), STR("self::node()[not(.=ancestor::TR/TD[3]/node())]")); - CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("4")).first_child(), STR("self::node()[not(.=ancestor::TR/TD[3]/node())]")) % 14; - - CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("1")), STR("node()[not(.=ancestor::TR/TD[3]/node())]")) % 5; - CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("2")), STR("node()[not(.=ancestor::TR/TD[3]/node())]")) % 8; - CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("3")), STR("node()[not(.=ancestor::TR/TD[3]/node())]")); - CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("4")), STR("node()[not(.=ancestor::TR/TD[3]/node())]")) % 14; -} - -TEST_XML(xpath_star_token, "0.5
") -{ - CHECK_XPATH_NODESET(doc, STR("//*[/* * 4]")) % 6 % 9; - CHECK_XPATH_NODESET(doc, STR("//*[/**4]")) % 6 % 9; - CHECK_XPATH_FAIL(STR("//*[/***4]")); -} - -TEST(xpath_miscellaneous) -{ - CHECK_XPATH_FAIL(STR("/root/child[a=3]/substring(child::text())")); - CHECK_XPATH_NODESET(xml_node(), STR("foo/@FOO/@bar")); -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +#include +#include +#include + +#include + +TEST(xpath_allocator_many_pages) +{ + pugi::string_t query = STR("0"); + + for (int i = 0; i < 128; ++i) query += STR("+string-length('abcdefgh')"); + + CHECK_XPATH_NUMBER(xml_node(), query.c_str(), 1024); +} + +TEST(xpath_allocator_large_page) +{ + pugi::string_t query; + + for (int i = 0; i < 1024; ++i) query += STR("abcdefgh"); + + CHECK_XPATH_NUMBER(xml_node(), (STR("string-length('") + query + STR("')")).c_str(), 8192); +} + +TEST_XML(xpath_sort_complex, "test") +{ + // just some random union order, it should not matter probably? + xpath_node_set ns = doc.child(STR("node")).select_nodes(STR("child1 | child2 | child1/@* | . | child2/@* | child2/text()")); + + ns.sort(false); + xpath_node_set sorted = ns; + + ns.sort(true); + xpath_node_set reverse_sorted = ns; + + xpath_node_set_tester(sorted, "sorted order failed") % 2 % 3 % 4 % 5 % 6 % 7 % 8; + xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 8 % 7 % 6 % 5 % 4 % 3 % 2; +} + +TEST_XML(xpath_sort_children, "") +{ + xpath_node_set ns = doc.child(STR("node")).select_nodes(STR("child/subchild[@id=1] | child/subchild[@id=2]")); + + ns.sort(false); + xpath_node_set sorted = ns; + + ns.sort(true); + xpath_node_set reverse_sorted = ns; + + xpath_node_set_tester(sorted, "sorted order failed") % 4 % 7; + xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 7 % 4; +} + +TEST_XML(xpath_sort_attributes, "") +{ + xml_node n = doc.child(STR("node")); + + // we need to insert attributes manually since unsorted node sets are (always?) sorted via pointers because of remove_duplicates, + // so we need to have different document and pointer order to cover all comparator cases + n.append_attribute(STR("attr2")); + n.append_attribute(STR("attr3")); + n.insert_attribute_before(STR("attr1"), n.attribute(STR("attr2"))); + + xpath_node_set ns = n.select_nodes(STR("@*")); + + ns.sort(true); + xpath_node_set reverse_sorted = ns; + + ns.sort(false); + xpath_node_set sorted = ns; + + xpath_node_set_tester(sorted, "sorted order failed") % 3 % 4 % 5; + xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 5 % 4 % 3; +} + +TEST(xpath_long_numbers_parse) +{ + const pugi::char_t* str_flt_max = STR("340282346638528860000000000000000000000"); + const pugi::char_t* str_flt_max_dec = STR("340282346638528860000000000000000000000.000000"); + + const pugi::char_t* str_dbl_max = STR("179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"); + const pugi::char_t* str_dbl_max_dec = STR("179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.000000"); + + xml_node c; + + // check parsing + CHECK_XPATH_NUMBER(c, str_flt_max, FLT_MAX); + CHECK_XPATH_NUMBER(c, str_flt_max_dec, FLT_MAX); + CHECK_XPATH_NUMBER(c, str_dbl_max, DBL_MAX); + CHECK_XPATH_NUMBER(c, str_dbl_max_dec, DBL_MAX); +} + +static bool test_xpath_string_prefix(const pugi::xml_node& node, const pugi::char_t* query, const pugi::char_t* expected, size_t match_length) +{ +#ifdef PUGIXML_WCHAR_MODE + size_t expected_length = wcslen(expected); +#else + size_t expected_length = strlen(expected); +#endif + + pugi::xpath_query q(query); + pugi::string_t value = q.evaluate_string(node); + + return value.length() == expected_length && value.compare(0, match_length, expected, match_length) == 0; +} + +TEST(xpath_long_numbers_stringize) +{ + const pugi::char_t* str_flt_max = STR("340282346638528860000000000000000000000"); + const pugi::char_t* str_flt_max_dec = STR("340282346638528860000000000000000000000.000000"); + + const pugi::char_t* str_dbl_max = STR("179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"); + const pugi::char_t* str_dbl_max_dec = STR("179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.000000"); + + xml_node c; + + CHECK(test_xpath_string_prefix(c, str_flt_max, str_flt_max, 15)); + CHECK(test_xpath_string_prefix(c, str_flt_max_dec, str_flt_max, 15)); + + CHECK(test_xpath_string_prefix(c, str_dbl_max, str_dbl_max, 15)); + CHECK(test_xpath_string_prefix(c, str_dbl_max_dec, str_dbl_max, 15)); +} + +#include + +TEST(xpath_denorm_numbers) +{ + pugi::string_t query; + + // 10^-318 - double denormal + for (int i = 0; i < 106; ++i) + { + if (i != 0) query += STR(" * "); + query += STR("0.001"); + } + + CHECK_XPATH_STRING(xml_node(), query.c_str(), STR("0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009999987484955998")); +} + +TEST_XML(xpath_rexml_1, "") +{ + CHECK_XPATH_NODESET(doc, STR("//*[local-name()='c' and @id='b']")) % 6; + CHECK_XPATH_NODESET(doc, STR("//*[ local-name()='c' and @id='b' ]")) % 6; + CHECK_XPATH_NODESET(doc, STR("/a/c[@id]")) % 6; + CHECK_XPATH_NODESET(doc, STR("/a/c[(@id)]")) % 6; + CHECK_XPATH_NODESET(doc, STR("/a/c[ @id ]")) % 6; + CHECK_XPATH_NODESET(doc, STR("/a/c[ (@id) ]")) % 6; + CHECK_XPATH_NODESET(doc, STR("/a/c[( @id )]")) % 6; + CHECK_XPATH_NODESET(doc, STR("/a/c[ ( @id ) ]")) % 6; + CHECK_XPATH_NODESET(doc, STR("/a/c [ ( @id ) ] ")) % 6; + CHECK_XPATH_NODESET(doc, STR(" / a / c [ ( @id ) ] ")) % 6; +} + +TEST_XML(xpath_rexml_2, "zzz") +{ + CHECK_XPATH_NODESET(doc, STR("a:x/a:y[@p='p' and @q='q']/a:z/text()")) % 8; +} + +TEST_XML(xpath_rexml_3, "
free flowing text.
free flowing text.
free flowing text.
") +{ + CHECK_XPATH_NODESET(doc, STR("//section[../self::section[@role=\"division\"]]")) % 10 % 15; + CHECK_XPATH_NODESET(doc, STR("//section[@role=\"subdivision\" and not(../self::section[@role=\"division\"])]")) % 3; + CHECK_XPATH_NODESET(doc, STR("//section[@role=\"subdivision\"][not(../self::section[@role=\"division\"])]")) % 3; +} + +TEST_XML_FLAGS(xpath_rexml_4, "TEXT1TEXT2", parse_default | parse_comments) +{ + CHECK_XPATH_NODESET(doc, STR("/descendant-or-self::node()[count(child::node()|following-sibling::node()|preceding-sibling::node())=0]")) % 6 % 17 % 20; +} + +TEST_XML(xpath_rexml_5, "") +{ + CHECK_XPATH_FAIL(STR(".//[@id]")); + CHECK_XPATH_NODESET(doc, STR(".//self::*[@id]")) % 4 % 6; + CHECK_XPATH_NODESET(doc, STR(".//node()[@id]")) % 4 % 6; +} + +TEST_XML(xpath_rexml_6, "
ab
") +{ + CHECK_XPATH_NODESET(doc, STR("//em|//strong")) % 4 % 6; + CHECK_XPATH_NODESET(doc, STR("//*[self::em | self::strong]")) % 4 % 6; + CHECK_XPATH_NODESET(doc, STR("//*[name()=\"em\" or name()=\"strong\"]")) % 4 % 6; + CHECK_XPATH_NODESET(doc, STR("//*[self::em or self::strong]")) % 4 % 6; +} + +TEST_XML(xpath_xsl_list_1, "whatevertextselectsomething") +{ + // if I'm not last, and the next input/type isn't select + CHECK_XPATH_NODESET(doc, STR("input[type[parent::input/following-sibling::input[1]/type != 'select']]")) % 2 % 8; + CHECK_XPATH_NODESET(doc, STR("input[type[../following-sibling::input[1]/type != 'select']]")) % 2 % 8; + + CHECK_XPATH_NODESET(doc, STR("input[position()+1]")); +} + +TEST_XML(xpath_xsl_list_2, "
") +{ + CHECK_XPATH_FAIL(STR(".[not(.=ancestor::TR/TD[15]/node())]")); + + CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("1")).first_child(), STR("self::node()[not(.=ancestor::TR/TD[3]/node())]")) % 5; + CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("2")).first_child(), STR("self::node()[not(.=ancestor::TR/TD[3]/node())]")) % 8; + CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("3")).first_child(), STR("self::node()[not(.=ancestor::TR/TD[3]/node())]")); + CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("4")).first_child(), STR("self::node()[not(.=ancestor::TR/TD[3]/node())]")) % 14; + + CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("1")), STR("node()[not(.=ancestor::TR/TD[3]/node())]")) % 5; + CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("2")), STR("node()[not(.=ancestor::TR/TD[3]/node())]")) % 8; + CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("3")), STR("node()[not(.=ancestor::TR/TD[3]/node())]")); + CHECK_XPATH_NODESET(doc.child(STR("TR")).find_child_by_attribute(STR("TD"), STR("id"), STR("4")), STR("node()[not(.=ancestor::TR/TD[3]/node())]")) % 14; +} + +TEST_XML(xpath_star_token, "0.5
") +{ + CHECK_XPATH_NODESET(doc, STR("//*[/* * 4]")) % 6 % 9; + CHECK_XPATH_NODESET(doc, STR("//*[/**4]")) % 6 % 9; + CHECK_XPATH_FAIL(STR("//*[/***4]")); +} + +TEST(xpath_miscellaneous) +{ + CHECK_XPATH_FAIL(STR("/root/child[a=3]/substring(child::text())")); + CHECK_XPATH_NODESET(xml_node(), STR("foo/@FOO/@bar")); +} + +#endif diff --git a/tests/test_xpath_api.cpp b/tests/test_xpath_api.cpp index 56e8ff6..f7fc868 100644 --- a/tests/test_xpath_api.cpp +++ b/tests/test_xpath_api.cpp @@ -1,150 +1,150 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -#include "helpers.hpp" - -#include - -TEST_XML(xpath_api_select_nodes, "") -{ - xpath_node_set ns1 = doc.select_nodes(STR("node/foo")); - - xpath_query q(STR("node/foo")); - xpath_node_set ns2 = doc.select_nodes(q); - - xpath_node_set_tester(ns1, "ns1") % 4 % 5; - xpath_node_set_tester(ns2, "ns2") % 4 % 5; -} - -TEST_XML(xpath_api_select_single_node, "") -{ - xpath_node n1 = doc.select_single_node(STR("node/foo")); - - xpath_query q(STR("node/foo")); - xpath_node n2 = doc.select_single_node(q); - - CHECK(n1.node().attribute(STR("id")).as_int() == 1); - CHECK(n2.node().attribute(STR("id")).as_int() == 1); - - xpath_node n3 = doc.select_single_node(STR("node/bar")); - - CHECK(!n3); - - xpath_node n4 = doc.select_single_node(STR("node/head/following-sibling::foo")); - xpath_node n5 = doc.select_single_node(STR("node/tail/preceding-sibling::foo")); - - CHECK(n4.node().attribute(STR("id")).as_int() == 1); - CHECK(n5.node().attribute(STR("id")).as_int() == 1); -} - -TEST(xpath_api_exception_what) -{ - try - { - xpath_query q(STR("")); - } - catch (const xpath_exception& e) - { - CHECK(e.what()[0] != 0); - } -} - -TEST_XML(xpath_api_node_bool_ops, "") -{ - generic_bool_ops_test(doc.select_single_node(STR("node"))); - generic_bool_ops_test(doc.select_single_node(STR("node/@attr"))); -} - -TEST_XML(xpath_api_node_eq_ops, "") -{ - generic_eq_ops_test(doc.select_single_node(STR("node")), doc.select_single_node(STR("node/@attr"))); -} - -TEST_XML(xpath_api_node_accessors, "") -{ - xpath_node null; - xpath_node node = doc.select_single_node(STR("node")); - xpath_node attr = doc.select_single_node(STR("node/@attr")); - - CHECK(!null.node()); - CHECK(!null.attribute()); - CHECK(!null.parent()); - - CHECK(node.node() == doc.child(STR("node"))); - CHECK(!node.attribute()); - CHECK(node.parent() == doc); - - CHECK(!attr.node()); - CHECK(attr.attribute() == doc.child(STR("node")).attribute(STR("attr"))); - CHECK(attr.parent() == doc.child(STR("node"))); -} - -inline void xpath_api_node_accessors_helper(const xpath_node_set& set) -{ - CHECK(set.size() == 2); - CHECK(set.type() == xpath_node_set::type_sorted); - CHECK(!set.empty()); - CHECK_STRING(set[0].node().name(), STR("foo")); - CHECK_STRING(set[1].node().name(), STR("foo")); - CHECK(set.first() == set[0]); - CHECK(set.begin() + 2 == set.end()); - CHECK(set.begin()[0] == set[0] && set.begin()[1] == set[1]); -} - -TEST_XML(xpath_api_nodeset_accessors, "") -{ - xpath_node_set null; - CHECK(null.size() == 0); - CHECK(null.type() == xpath_node_set::type_unsorted); - CHECK(null.empty()); - CHECK(!null.first()); - CHECK(null.begin() == null.end()); - - xpath_node_set set = doc.select_nodes(STR("node/foo")); - xpath_api_node_accessors_helper(set); - - xpath_node_set copy = set; - xpath_api_node_accessors_helper(copy); - - xpath_node_set assigned; - assigned = set; - xpath_api_node_accessors_helper(assigned); - - xpath_node_set nullcopy = null; -} - -TEST_XML(xpath_api_evaluate, "") -{ - xpath_query q(STR("node/@attr")); - - CHECK(q.evaluate_boolean(doc)); - CHECK(q.evaluate_number(doc) == 3); - CHECK(q.evaluate_string(doc) == STR("3")); - - xpath_node_set ns = q.evaluate_node_set(doc); - CHECK(ns.size() == 1 && ns[0].attribute() == doc.child(STR("node")).attribute(STR("attr"))); -} - -TEST(xpath_api_evaluate_node_set) -{ - try - { - xpath_query q(STR("1")); - - q.evaluate_node_set(xml_node()); - } - catch (const xpath_exception&) - { - } -} - -TEST(xpath_api_return_type) -{ - CHECK(xpath_query(STR("node")).return_type() == xpath_type_node_set); - CHECK(xpath_query(STR("1")).return_type() == xpath_type_number); - CHECK(xpath_query(STR("'s'")).return_type() == xpath_type_string); - CHECK(xpath_query(STR("true()")).return_type() == xpath_type_boolean); -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +#include "helpers.hpp" + +#include + +TEST_XML(xpath_api_select_nodes, "") +{ + xpath_node_set ns1 = doc.select_nodes(STR("node/foo")); + + xpath_query q(STR("node/foo")); + xpath_node_set ns2 = doc.select_nodes(q); + + xpath_node_set_tester(ns1, "ns1") % 4 % 5; + xpath_node_set_tester(ns2, "ns2") % 4 % 5; +} + +TEST_XML(xpath_api_select_single_node, "") +{ + xpath_node n1 = doc.select_single_node(STR("node/foo")); + + xpath_query q(STR("node/foo")); + xpath_node n2 = doc.select_single_node(q); + + CHECK(n1.node().attribute(STR("id")).as_int() == 1); + CHECK(n2.node().attribute(STR("id")).as_int() == 1); + + xpath_node n3 = doc.select_single_node(STR("node/bar")); + + CHECK(!n3); + + xpath_node n4 = doc.select_single_node(STR("node/head/following-sibling::foo")); + xpath_node n5 = doc.select_single_node(STR("node/tail/preceding-sibling::foo")); + + CHECK(n4.node().attribute(STR("id")).as_int() == 1); + CHECK(n5.node().attribute(STR("id")).as_int() == 1); +} + +TEST(xpath_api_exception_what) +{ + try + { + xpath_query q(STR("")); + } + catch (const xpath_exception& e) + { + CHECK(e.what()[0] != 0); + } +} + +TEST_XML(xpath_api_node_bool_ops, "") +{ + generic_bool_ops_test(doc.select_single_node(STR("node"))); + generic_bool_ops_test(doc.select_single_node(STR("node/@attr"))); +} + +TEST_XML(xpath_api_node_eq_ops, "") +{ + generic_eq_ops_test(doc.select_single_node(STR("node")), doc.select_single_node(STR("node/@attr"))); +} + +TEST_XML(xpath_api_node_accessors, "") +{ + xpath_node null; + xpath_node node = doc.select_single_node(STR("node")); + xpath_node attr = doc.select_single_node(STR("node/@attr")); + + CHECK(!null.node()); + CHECK(!null.attribute()); + CHECK(!null.parent()); + + CHECK(node.node() == doc.child(STR("node"))); + CHECK(!node.attribute()); + CHECK(node.parent() == doc); + + CHECK(!attr.node()); + CHECK(attr.attribute() == doc.child(STR("node")).attribute(STR("attr"))); + CHECK(attr.parent() == doc.child(STR("node"))); +} + +inline void xpath_api_node_accessors_helper(const xpath_node_set& set) +{ + CHECK(set.size() == 2); + CHECK(set.type() == xpath_node_set::type_sorted); + CHECK(!set.empty()); + CHECK_STRING(set[0].node().name(), STR("foo")); + CHECK_STRING(set[1].node().name(), STR("foo")); + CHECK(set.first() == set[0]); + CHECK(set.begin() + 2 == set.end()); + CHECK(set.begin()[0] == set[0] && set.begin()[1] == set[1]); +} + +TEST_XML(xpath_api_nodeset_accessors, "") +{ + xpath_node_set null; + CHECK(null.size() == 0); + CHECK(null.type() == xpath_node_set::type_unsorted); + CHECK(null.empty()); + CHECK(!null.first()); + CHECK(null.begin() == null.end()); + + xpath_node_set set = doc.select_nodes(STR("node/foo")); + xpath_api_node_accessors_helper(set); + + xpath_node_set copy = set; + xpath_api_node_accessors_helper(copy); + + xpath_node_set assigned; + assigned = set; + xpath_api_node_accessors_helper(assigned); + + xpath_node_set nullcopy = null; +} + +TEST_XML(xpath_api_evaluate, "") +{ + xpath_query q(STR("node/@attr")); + + CHECK(q.evaluate_boolean(doc)); + CHECK(q.evaluate_number(doc) == 3); + CHECK(q.evaluate_string(doc) == STR("3")); + + xpath_node_set ns = q.evaluate_node_set(doc); + CHECK(ns.size() == 1 && ns[0].attribute() == doc.child(STR("node")).attribute(STR("attr"))); +} + +TEST(xpath_api_evaluate_node_set) +{ + try + { + xpath_query q(STR("1")); + + q.evaluate_node_set(xml_node()); + } + catch (const xpath_exception&) + { + } +} + +TEST(xpath_api_return_type) +{ + CHECK(xpath_query(STR("node")).return_type() == xpath_type_node_set); + CHECK(xpath_query(STR("1")).return_type() == xpath_type_number); + CHECK(xpath_query(STR("'s'")).return_type() == xpath_type_string); + CHECK(xpath_query(STR("true()")).return_type() == xpath_type_boolean); +} + +#endif diff --git a/tests/test_xpath_functions.cpp b/tests/test_xpath_functions.cpp index 7b44294..3eb69c6 100644 --- a/tests/test_xpath_functions.cpp +++ b/tests/test_xpath_functions.cpp @@ -1,747 +1,747 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -TEST_XML(xpath_number_number, "123") -{ - xml_node c; - xml_node n = doc.child(STR("node")).first_child(); - - // number with 0 arguments - CHECK_XPATH_NUMBER_NAN(c, STR("number()")); - CHECK_XPATH_NUMBER(n, STR("number()"), 123); - - // number with 1 string argument - CHECK_XPATH_NUMBER(c, STR("number(' -123.456 ')"), -123.456); - CHECK_XPATH_NUMBER(c, STR("number(' -123.')"), -123); - CHECK_XPATH_NUMBER(c, STR("number('123.')"), 123); - CHECK_XPATH_NUMBER(c, STR("number('.56')"), 0.56); - CHECK_XPATH_NUMBER(c, STR("number('123 ')"), 123); - CHECK_XPATH_NUMBER_NAN(c, STR("number('foobar')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('f1')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('1f')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('1.f')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('1.0f')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('123 f')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('.')")); - - // number with 1 bool argument - CHECK_XPATH_NUMBER(c, STR("number(true())"), 1); - CHECK_XPATH_NUMBER(c, STR("number(false())"), 0); - - // number with 1 node set argument - CHECK_XPATH_NUMBER(n, STR("number(.)"), 123); - - // number with 1 number argument - CHECK_XPATH_NUMBER(c, STR("number(1)"), 1); - - // number with 2 arguments - CHECK_XPATH_FAIL(STR("number(1, 2)")); -} - -TEST_XML(xpath_number_sum, "123789") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // sum with 0 arguments - CHECK_XPATH_FAIL(STR("sum()")); - - // sum with 1 argument - CHECK_XPATH_NUMBER(c, STR("sum(.)"), 0); - CHECK_XPATH_NUMBER(n, STR("sum(.)"), 123789); // 123 .. 789 - - CHECK_XPATH_NUMBER(n, STR("sum(./descendant-or-self::node())"), 125490); // node + 123 + child + 789 = 123789 + 123 + 789 + 789 = 125490 - CHECK_XPATH_NUMBER(n, STR("sum(.//node())"), 1701); // 123 + child + 789 = 123 + 789 + 789 - CHECK_XPATH_NUMBER_NAN(doc.last_child(), STR("sum(.)")); - - // sum with 2 arguments - CHECK_XPATH_FAIL(STR("sum(1, 2)")); - - // sum with 1 non-node-set argument - CHECK_XPATH_FAIL(STR("sum(1)")); -} - -TEST(xpath_number_floor) -{ - xml_node c; - - // floor with 0 arguments - CHECK_XPATH_FAIL(STR("floor()")); - - // floor with 1 argument - CHECK_XPATH_NUMBER(c, STR("floor(0)"), 0); - CHECK_XPATH_NUMBER(c, STR("floor(1.2)"), 1); - CHECK_XPATH_NUMBER(c, STR("floor(1)"), 1); - CHECK_XPATH_NUMBER(c, STR("floor(-1.2)"), -2); - CHECK_XPATH_NUMBER_NAN(c, STR("floor(string('nan'))")); - CHECK_XPATH_STRING(c, STR("string(floor(1 div 0))"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("string(floor(-1 div 0))"), STR("-Infinity")); - - // floor with 2 arguments - CHECK_XPATH_FAIL(STR("floor(1, 2)")); - - // floor with argument 0 should return 0 - CHECK_XPATH_STRING(c, STR("string(1 div floor(0))"), STR("Infinity")); - - // floor with argument -0 should return -0 -#if !(defined(__APPLE__) && defined(__MACH__)) // MacOS X gcc 4.0.1 implements floor incorrectly (floor never returns -0) - CHECK_XPATH_STRING(c, STR("string(1 div floor(-0))"), STR("-Infinity")); -#endif -} - -TEST(xpath_number_ceiling) -{ - xml_node c; - - // ceiling with 0 arguments - CHECK_XPATH_FAIL(STR("ceiling()")); - - // ceiling with 1 argument - CHECK_XPATH_NUMBER(c, STR("ceiling(0)"), 0); - CHECK_XPATH_NUMBER(c, STR("ceiling(1.2)"), 2); - CHECK_XPATH_NUMBER(c, STR("ceiling(1)"), 1); - CHECK_XPATH_NUMBER(c, STR("ceiling(-1.2)"), -1); - CHECK_XPATH_NUMBER_NAN(c, STR("ceiling(string('nan'))")); - CHECK_XPATH_STRING(c, STR("string(ceiling(1 div 0))"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("string(ceiling(-1 div 0))"), STR("-Infinity")); - - // ceiling with 2 arguments - CHECK_XPATH_FAIL(STR("ceiling(1, 2)")); - - // ceiling with argument 0 should return 0 - CHECK_XPATH_STRING(c, STR("string(1 div ceiling(0))"), STR("Infinity")); - - // ceiling with argument in range (-1, -0] should result in minus zero -#if !(defined(__APPLE__) && defined(__MACH__)) // MacOS X gcc 4.0.1 implements ceil incorrectly (ceil never returns -0) - CHECK_XPATH_STRING(c, STR("string(1 div ceiling(-0))"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("string(1 div ceiling(-0.1))"), STR("-Infinity")); -#endif -} - -TEST(xpath_number_round) -{ - xml_node c; - - // round with 0 arguments - CHECK_XPATH_FAIL(STR("round()")); - - // round with 1 argument - CHECK_XPATH_NUMBER(c, STR("round(1.2)"), 1); - CHECK_XPATH_NUMBER(c, STR("round(1.5)"), 2); - CHECK_XPATH_NUMBER(c, STR("round(1.8)"), 2); - CHECK_XPATH_NUMBER(c, STR("round(1)"), 1); - CHECK_XPATH_NUMBER(c, STR("round(-1.2)"), -1); - CHECK_XPATH_NUMBER(c, STR("round(-1.5)"), -1); - CHECK_XPATH_NUMBER(c, STR("round(-1.6)"), -2); - CHECK_XPATH_NUMBER_NAN(c, STR("round(string('nan'))")); - CHECK_XPATH_STRING(c, STR("string(round(1 div 0))"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("string(round(-1 div 0))"), STR("-Infinity")); - - // round with 2 arguments - CHECK_XPATH_FAIL(STR("round(1, 2)")); - - // round with argument in range [-0.5, -0] should result in minus zero - CHECK_XPATH_STRING(c, STR("string(1 div round(0))"), STR("Infinity")); - -#if !(defined(__APPLE__) && defined(__MACH__)) // MacOS X gcc 4.0.1 implements ceil incorrectly (ceil never returns -0) - CHECK_XPATH_STRING(c, STR("string(1 div round(-0.5))"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("string(1 div round(-0))"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("string(1 div round(-0.1))"), STR("-Infinity")); -#endif -} - -TEST_XML(xpath_boolean_boolean, "") -{ - xml_node c; - - // boolean with 0 arguments - CHECK_XPATH_FAIL(STR("boolean()")); - - // boolean with 1 number argument - CHECK_XPATH_BOOLEAN(c, STR("boolean(0)"), false); - CHECK_XPATH_BOOLEAN(c, STR("boolean(1)"), true); - CHECK_XPATH_BOOLEAN(c, STR("boolean(-1)"), true); - CHECK_XPATH_BOOLEAN(c, STR("boolean(0.1)"), true); - CHECK_XPATH_BOOLEAN(c, STR("boolean(number('nan'))"), false); - - // boolean with 1 string argument - CHECK_XPATH_BOOLEAN(c, STR("boolean('x')"), true); - CHECK_XPATH_BOOLEAN(c, STR("boolean('')"), false); - - // boolean with 1 node set argument - CHECK_XPATH_BOOLEAN(c, STR("boolean(.)"), false); - CHECK_XPATH_BOOLEAN(doc, STR("boolean(.)"), true); - CHECK_XPATH_BOOLEAN(doc, STR("boolean(foo)"), false); - - // boolean with 2 arguments - CHECK_XPATH_FAIL(STR("boolean(1, 2)")); -} - -TEST(xpath_boolean_not) -{ - xml_node c; - - // not with 0 arguments - CHECK_XPATH_FAIL(STR("not()")); - - // not with 1 argument - CHECK_XPATH_BOOLEAN(c, STR("not(true())"), false); - CHECK_XPATH_BOOLEAN(c, STR("not(false())"), true); - - // boolean with 2 arguments - CHECK_XPATH_FAIL(STR("not(1, 2)")); -} - -TEST(xpath_boolean_true) -{ - xml_node c; - - // true with 0 arguments - CHECK_XPATH_BOOLEAN(c, STR("true()"), true); - - // true with 1 argument - CHECK_XPATH_FAIL(STR("true(1)")); -} - -TEST(xpath_boolean_false) -{ - xml_node c; - - // false with 0 arguments - CHECK_XPATH_BOOLEAN(c, STR("false()"), false); - - // false with 1 argument - CHECK_XPATH_FAIL(STR("false(1)")); -} - -TEST_XML(xpath_boolean_lang, "") -{ - xml_node c; - - // lang with 0 arguments - CHECK_XPATH_FAIL(STR("lang()")); - - // lang with 1 argument, no language - CHECK_XPATH_BOOLEAN(c, STR("lang('en')"), false); - CHECK_XPATH_BOOLEAN(doc.child(STR("foo")), STR("lang('en')"), false); - CHECK_XPATH_BOOLEAN(doc.child(STR("foo")), STR("lang('')"), false); - CHECK_XPATH_BOOLEAN(doc.child(STR("foo")).child(STR("bar")), STR("lang('en')"), false); - - // lang with 1 argument, same language/prefix - CHECK_XPATH_BOOLEAN(doc.child(STR("node")), STR("lang('en')"), true); - CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('ru-uk')"), true); - CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('ru')"), true); - CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")).child(STR("subchild")), STR("lang('ru')"), true); - CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")).child(STR("subchild")), STR("lang('RU')"), true); - - // lang with 1 argument, different language/prefix - CHECK_XPATH_BOOLEAN(doc.child(STR("node")), STR("lang('')"), false); - CHECK_XPATH_BOOLEAN(doc.child(STR("node")), STR("lang('e')"), false); - CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('en')"), false); - CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('ru-gb')"), false); - CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('r')"), false); - CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")).child(STR("subchild")), STR("lang('en')"), false); - - // lang with 2 arguments - CHECK_XPATH_FAIL(STR("lang(1, 2)")); -} - -TEST_XML(xpath_string_string, "123789100") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // string with 0 arguments - CHECK_XPATH_STRING(c, STR("string()"), STR("")); - CHECK_XPATH_STRING(n.child(STR("child")), STR("string()"), STR("789")); - - // string with 1 node-set argument - CHECK_XPATH_STRING(n, STR("string(child)"), STR("789")); - CHECK_XPATH_STRING(n, STR("string(child/@id)"), STR("1")); - CHECK_XPATH_STRING(n, STR("string(.)"), STR("123789200100")); - - // string with 1 number argument - CHECK_XPATH_STRING(c, STR("string(0 div 0)"), STR("NaN")); - CHECK_XPATH_STRING(c, STR("string(0)"), STR("0")); - CHECK_XPATH_STRING(c, STR("string(-0)"), STR("0")); - CHECK_XPATH_STRING(c, STR("string(1 div 0)"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("string(-1 div -0)"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("string(-1 div 0)"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("string(1 div -0)"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("string(1234567)"), STR("1234567")); - CHECK_XPATH_STRING(c, STR("string(-1234567)"), STR("-1234567")); - CHECK_XPATH_STRING(c, STR("string(1234.5678)"), STR("1234.5678")); - CHECK_XPATH_STRING(c, STR("string(-1234.5678)"), STR("-1234.5678")); - CHECK_XPATH_STRING(c, STR("string(0.5678)"), STR("0.5678")); - CHECK_XPATH_STRING(c, STR("string(-0.5678)"), STR("-0.5678")); - CHECK_XPATH_STRING(c, STR("string(0.0)"), STR("0")); - CHECK_XPATH_STRING(c, STR("string(-0.0)"), STR("0")); - - // string with 1 boolean argument - CHECK_XPATH_STRING(c, STR("string(true())"), STR("true")); - CHECK_XPATH_STRING(c, STR("string(false())"), STR("false")); - - // string with 1 string argument - CHECK_XPATH_STRING(c, STR("string('abc')"), STR("abc")); - - // string with 2 arguments - CHECK_XPATH_FAIL(STR("string(1, 2)")); -} - -TEST(xpath_string_concat) -{ - xml_node c; - - // concat with 0 arguments - CHECK_XPATH_FAIL(STR("concat()")); - - // concat with 1 argument - CHECK_XPATH_FAIL(STR("concat('')")); - - // concat with exactly 2 arguments - CHECK_XPATH_STRING(c, STR("concat('prev','next')"), STR("prevnext")); - CHECK_XPATH_STRING(c, STR("concat('','next')"), STR("next")); - CHECK_XPATH_STRING(c, STR("concat('prev','')"), STR("prev")); - - // concat with 3 or more arguments - CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c')"), STR("abc")); - CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c', 'd')"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c', 'd', 'e')"), STR("abcde")); - CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c', 'd', 'e', 'f')"), STR("abcdef")); - CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c', 'd', 'e', 'f', 'g')"), STR("abcdefg")); - CHECK_XPATH_STRING(c, STR("concat(1, 2, 3, 4, 5, 6, 7, 8)"), STR("12345678")); -} - -TEST(xpath_string_starts_with) -{ - xml_node c; - - // starts-with with 0 arguments - CHECK_XPATH_FAIL(STR("starts-with()")); - - // starts-with with 1 argument - CHECK_XPATH_FAIL(STR("starts-with('a')")); - - // starts-with with 2 arguments - CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', '')"), true); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', 'a')"), true); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', 'abc')"), true); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', 'abcd')"), false); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('bc', 'c')"), false); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('', 'c')"), false); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('', '')"), true); - - // starts-with with 3 arguments - CHECK_XPATH_FAIL(STR("starts-with('a', 'b', 'c')")); -} - -TEST(xpath_string_contains) -{ - xml_node c; - - // contains with 0 arguments - CHECK_XPATH_FAIL(STR("contains()")); - - // contains with 1 argument - CHECK_XPATH_FAIL(STR("contains('a')")); - - // contains with 2 arguments - CHECK_XPATH_BOOLEAN(c, STR("contains('abc', '')"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'a')"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'abc')"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('abcd', 'bc')"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'abcd')"), false); - CHECK_XPATH_BOOLEAN(c, STR("contains('b', 'bc')"), false); - CHECK_XPATH_BOOLEAN(c, STR("contains('', 'c')"), false); - CHECK_XPATH_BOOLEAN(c, STR("contains('', '')"), true); - - // contains with 3 arguments - CHECK_XPATH_FAIL(STR("contains('a', 'b', 'c')")); -} - -TEST(xpath_string_substring_before) -{ - xml_node c; - - // substring-before with 0 arguments - CHECK_XPATH_FAIL(STR("substring-before()")); - - // substring-before with 1 argument - CHECK_XPATH_FAIL(STR("substring-before('a')")); - - // substring-before with 2 arguments - CHECK_XPATH_STRING(c, STR("substring-before('abc', 'abc')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-before('abc', 'a')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-before('abc', 'cd')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-before('abc', 'b')"), STR("a")); - CHECK_XPATH_STRING(c, STR("substring-before('abc', 'c')"), STR("ab")); - CHECK_XPATH_STRING(c, STR("substring-before('', '')"), STR("")); - - // substring-before with 2 arguments, from W3C standard - CHECK_XPATH_STRING(c, STR("substring-before(\"1999/04/01\",\"/\")"), STR("1999")); - - // substring-before with 3 arguments - CHECK_XPATH_FAIL(STR("substring-before('a', 'b', 'c')")); -} - -TEST(xpath_string_substring_after) -{ - xml_node c; - - // substring-after with 0 arguments - CHECK_XPATH_FAIL(STR("substring-after()")); - - // substring-after with 1 argument - CHECK_XPATH_FAIL(STR("substring-after('a')")); - - // substring-after with 2 arguments - CHECK_XPATH_STRING(c, STR("substring-after('abc', 'abc')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-after('abc', 'a')"), STR("bc")); - CHECK_XPATH_STRING(c, STR("substring-after('abc', 'cd')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-after('abc', 'b')"), STR("c")); - CHECK_XPATH_STRING(c, STR("substring-after('abc', 'c')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-after('', '')"), STR("")); - - // substring-before with 2 arguments, from W3C standard - CHECK_XPATH_STRING(c, STR("substring-after(\"1999/04/01\",\"/\")"), STR("04/01")); - CHECK_XPATH_STRING(c, STR("substring-after(\"1999/04/01\",\"19\")"), STR("99/04/01")); - - // substring-after with 3 arguments - CHECK_XPATH_FAIL(STR("substring-after('a', 'b', 'c')")); -} - -TEST(xpath_string_substring) -{ - xml_node c; - - // substring with 0 arguments - CHECK_XPATH_FAIL(STR("substring()")); - - // substring with 1 argument - CHECK_XPATH_FAIL(STR("substring('')")); - - // substring with 2 arguments - CHECK_XPATH_STRING(c, STR("substring('abcd', 2)"), STR("bcd")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 1)"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 1.1)"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 1.5)"), STR("bcd")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 1.8)"), STR("bcd")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 10)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 0)"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("substring('abcd', -100)"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("substring('abcd', -1 div 0)"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 1 div 0)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 0 div 0)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('', 1)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('', 0)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring(substring('internalexternalcorrect substring',9),9)"), STR("correct substring")); - - // substring with 3 arguments - CHECK_XPATH_STRING(c, STR("substring('abcd', 2, 1)"), STR("b")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 2, 2)"), STR("bc")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 1, 0)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 1, 0.4)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 1, 0.5)"), STR("a")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 10, -5)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 0, -1)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('abcd', -100, 100)"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("substring('abcd', -1 div 0, 4)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 1 div 0, 0 div 0)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('abcd', 0 div 0, 1)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('', 1, 2)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('', 0, 0)"), STR("")); - - // substring with 3 arguments, from W3C standard - CHECK_XPATH_STRING(c, STR("substring('12345', 1.5, 2.6)"), STR("234")); - CHECK_XPATH_STRING(c, STR("substring('12345', 0, 3)"), STR("12")); - CHECK_XPATH_STRING(c, STR("substring('12345', 0 div 0, 3)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('12345', 1, 0 div 0)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('12345', -42, 1 div 0)"), STR("12345")); - CHECK_XPATH_STRING(c, STR("substring('12345', -1 div 0, 1 div 0)"), STR("")); - - // substring with 4 arguments - CHECK_XPATH_FAIL(STR("substring('', 1, 2, 3)")); -} - -TEST_XML(xpath_string_string_length, "123") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // string-length with 0 arguments - CHECK_XPATH_NUMBER(c, STR("string-length()"), 0); - CHECK_XPATH_NUMBER(n, STR("string-length()"), 3); - - // string-length with 1 argument - CHECK_XPATH_NUMBER(c, STR("string-length('')"), 0); - CHECK_XPATH_NUMBER(c, STR("string-length('a')"), 1); - CHECK_XPATH_NUMBER(c, STR("string-length('abcdef')"), 6); - - // string-length with 2 arguments - CHECK_XPATH_FAIL(STR("string-length(1, 2)")); -} - -TEST_XML_FLAGS(xpath_string_normalize_space, " \t\r\rval1 \rval2\r\nval3\nval4\r\r", parse_minimal) -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // normalize-space with 0 arguments - CHECK_XPATH_STRING(c, STR("normalize-space()"), STR("")); - CHECK_XPATH_STRING(n, STR("normalize-space()"), STR("val1 val2 val3 val4")); - - // normalize-space with 1 argument - CHECK_XPATH_STRING(c, STR("normalize-space('')"), STR("")); - CHECK_XPATH_STRING(c, STR("normalize-space('abcd')"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("normalize-space(' \r\nabcd')"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("normalize-space('abcd \n\r')"), STR("abcd")); - CHECK_XPATH_STRING(c, STR("normalize-space('ab\r\n\tcd')"), STR("ab cd")); - CHECK_XPATH_STRING(c, STR("normalize-space('ab cd')"), STR("ab cd")); - CHECK_XPATH_STRING(c, STR("normalize-space('\07')"), STR("\07")); - - // normalize-space with 2 arguments - CHECK_XPATH_FAIL(STR("normalize-space(1, 2)")); -} - -TEST(xpath_string_translate) -{ - xml_node c; - - // translate with 0 arguments - CHECK_XPATH_FAIL(STR("translate()")); - - // translate with 1 argument - CHECK_XPATH_FAIL(STR("translate('a')")); - - // translate with 2 arguments - CHECK_XPATH_FAIL(STR("translate('a', 'b')")); - - // translate with 3 arguments - CHECK_XPATH_STRING(c, STR("translate('abc', '', '')"), STR("abc")); - CHECK_XPATH_STRING(c, STR("translate('abc', '', 'foo')"), STR("abc")); - CHECK_XPATH_STRING(c, STR("translate('abc', 'ab', 'ba')"), STR("bac")); - CHECK_XPATH_STRING(c, STR("translate('abc', 'ab', 'f')"), STR("fc")); - CHECK_XPATH_STRING(c, STR("translate('abc', 'aabb', '1234')"), STR("13c")); - CHECK_XPATH_STRING(c, STR("translate('', 'abc', 'bac')"), STR("")); - - // translate with 3 arguments, from W3C standard - CHECK_XPATH_STRING(c, STR("translate('bar','abc','ABC')"), STR("BAr")); - CHECK_XPATH_STRING(c, STR("translate('--aaa--','abc-','ABC')"), STR("AAA")); - - // translate with 4 arguments - CHECK_XPATH_FAIL(STR("translate('a', 'b', 'c', 'd')")); -} - -TEST_XML(xpath_nodeset_last, "") -{ - xml_node n = doc.child(STR("node")); - - // last with 0 arguments - CHECK_XPATH_NUMBER(n, STR("last()"), 1); - CHECK_XPATH_NODESET(n, STR("c1[last() = 1]")); - CHECK_XPATH_NODESET(n, STR("c1[last() = 2]")) % 3 % 4; // c1, c1 - CHECK_XPATH_NODESET(n, STR("c2/preceding-sibling::node()[last() = 2]")) % 4 % 3; // c1, c1 - - // last with 1 argument - CHECK_XPATH_FAIL(STR("last(c)")); -} - -TEST_XML(xpath_nodeset_position, "") -{ - xml_node n = doc.child(STR("node")); - - // position with 0 arguments - CHECK_XPATH_NUMBER(n, STR("position()"), 1); - CHECK_XPATH_NODESET(n, STR("c1[position() = 0]")); - CHECK_XPATH_NODESET(n, STR("c1[position() = 1]")) % 3; - CHECK_XPATH_NODESET(n, STR("c1[position() = 2]")) % 4; - CHECK_XPATH_NODESET(n, STR("c1[position() = 3]")); - CHECK_XPATH_NODESET(n, STR("c2/preceding-sibling::node()[position() = 1]")) % 4; - CHECK_XPATH_NODESET(n, STR("c2/preceding-sibling::node()[position() = 2]")) % 3; - - // position with 1 argument - CHECK_XPATH_FAIL(STR("position(c)")); -} - -TEST_XML(xpath_nodeset_count, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // count with 0 arguments - CHECK_XPATH_FAIL(STR("count()")); - - // count with 1 non-node-set argument - CHECK_XPATH_FAIL(STR("count(1)")); - CHECK_XPATH_FAIL(STR("count(true())")); - CHECK_XPATH_FAIL(STR("count('')")); - - // count with 1 node-set argument - CHECK_XPATH_NUMBER(c, STR("count(.)"), 0); - CHECK_XPATH_NUMBER(n, STR("count(.)"), 1); - CHECK_XPATH_NUMBER(n, STR("count(c1)"), 2); - CHECK_XPATH_NUMBER(n, STR("count(c2)"), 1); - CHECK_XPATH_NUMBER(n, STR("count(c3)"), 4); - CHECK_XPATH_NUMBER(n, STR("count(c4)"), 0); - - // count with 2 arguments - CHECK_XPATH_FAIL(STR("count(x, y)")); -} - -TEST_XML(xpath_nodeset_id, "") -{ - xml_node n = doc.child(STR("node")); - - // id with 0 arguments - CHECK_XPATH_FAIL(STR("id()")); - - // id with 1 argument - no DTD => no id - CHECK_XPATH_NODESET(n, STR("id('foo')")); - - // id with 2 arguments - CHECK_XPATH_FAIL(STR("id(1, 2)")); -} - -TEST_XML_FLAGS(xpath_nodeset_local_name, "text", parse_default | parse_pi) -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // local-name with 0 arguments - CHECK_XPATH_STRING(c, STR("local-name()"), STR("")); - CHECK_XPATH_STRING(n, STR("local-name()"), STR("node")); - - // local-name with 1 non-node-set argument - CHECK_XPATH_FAIL(STR("local-name(1)")); - - // local-name with 1 node-set argument - CHECK_XPATH_STRING(n, STR("local-name(c1)"), STR("c1")); - CHECK_XPATH_STRING(n, STR("local-name(c2/node())"), STR("child")); - CHECK_XPATH_STRING(n, STR("local-name(c2/attribute::node())"), STR("attr")); - CHECK_XPATH_STRING(n, STR("local-name(c1/node())"), STR("")); - CHECK_XPATH_STRING(n, STR("local-name(c4/node())"), STR("target")); - CHECK_XPATH_STRING(n, STR("local-name(c1/following-sibling::node())"), STR("c2")); - CHECK_XPATH_STRING(n, STR("local-name(c4/preceding-sibling::node())"), STR("c1")); - - // local-name with 2 arguments - CHECK_XPATH_FAIL(STR("local-name(c1, c2)")); -} - -TEST_XML_FLAGS(xpath_nodeset_namespace_uri, "text", parse_default | parse_pi) -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // namespace-uri with 0 arguments - CHECK_XPATH_STRING(c, STR("namespace-uri()"), STR("")); - CHECK_XPATH_STRING(n.child(STR("c2")).child(STR("foo:child")), STR("namespace-uri()"), STR("http://foo2")); - - // namespace-uri with 1 non-node-set argument - CHECK_XPATH_FAIL(STR("namespace-uri(1)")); - - // namespace-uri with 1 node-set argument - CHECK_XPATH_STRING(n, STR("namespace-uri(c1)"), STR("")); - CHECK_XPATH_STRING(n, STR("namespace-uri(c5/child::node())"), STR("http://foo")); - CHECK_XPATH_STRING(n, STR("namespace-uri(c2/attribute::node())"), STR("http://foo2")); - CHECK_XPATH_STRING(n, STR("namespace-uri(c2/child::node())"), STR("http://foo2")); - CHECK_XPATH_STRING(n, STR("namespace-uri(c1/child::node())"), STR("")); - CHECK_XPATH_STRING(n, STR("namespace-uri(c4/child::node())"), STR("")); - CHECK_XPATH_STRING(n, STR("namespace-uri(c3)"), STR("http://def")); - CHECK_XPATH_STRING(n, STR("namespace-uri(c3/@attr)"), STR("")); // the namespace name for an unprefixed attribute name always has no value (Namespaces in XML 1.0) - CHECK_XPATH_STRING(n, STR("namespace-uri(c3/child::node())"), STR("http://def")); - CHECK_XPATH_STRING(n, STR("namespace-uri(c6/@bar:attr)"), STR("")); - - // namespace-uri with 2 arguments - CHECK_XPATH_FAIL(STR("namespace-uri(c1, c2)")); -} - -TEST_XML_FLAGS(xpath_nodeset_name, "text", parse_default | parse_pi) -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // name with 0 arguments - CHECK_XPATH_STRING(c, STR("name()"), STR("")); - CHECK_XPATH_STRING(n, STR("name()"), STR("node")); - - // name with 1 non-node-set argument - CHECK_XPATH_FAIL(STR("name(1)")); - - // name with 1 node-set argument - CHECK_XPATH_STRING(n, STR("name(c1)"), STR("c1")); - CHECK_XPATH_STRING(n, STR("name(c2/node())"), STR("foo:child")); - CHECK_XPATH_STRING(n, STR("name(c2/attribute::node())"), STR("foo:attr")); - CHECK_XPATH_STRING(n, STR("name(c1/node())"), STR("")); - CHECK_XPATH_STRING(n, STR("name(c4/node())"), STR("target")); - CHECK_XPATH_STRING(n, STR("name(c1/following-sibling::node())"), STR("c2")); - CHECK_XPATH_STRING(n, STR("name(c4/preceding-sibling::node())"), STR("c1")); - - // name with 2 arguments - CHECK_XPATH_FAIL(STR("name(c1, c2)")); -} - -TEST(xpath_function_arguments) -{ - xml_node c; - - // conversion to string - CHECK_XPATH_NUMBER(c, STR("string-length(12)"), 2); - - // conversion to number - CHECK_XPATH_NUMBER(c, STR("round('1.2')"), 1); - CHECK_XPATH_NUMBER(c, STR("round('1.7')"), 2); - - // conversion to boolean - CHECK_XPATH_BOOLEAN(c, STR("not('1')"), false); - CHECK_XPATH_BOOLEAN(c, STR("not('')"), true); - - // conversion to node set - CHECK_XPATH_FAIL(STR("sum(1)")); - - // expression evaluation - CHECK_XPATH_NUMBER(c, STR("round((2 + 2 * 2) div 4)"), 2); - - // empty expressions - CHECK_XPATH_FAIL(STR("round(,)")); - CHECK_XPATH_FAIL(STR("substring(,)")); - CHECK_XPATH_FAIL(STR("substring('a',)")); - CHECK_XPATH_FAIL(STR("substring(,'a')")); - - // extra commas - CHECK_XPATH_FAIL(STR("round(,1)")); - CHECK_XPATH_FAIL(STR("round(1,)")); - - // lack of commas - CHECK_XPATH_FAIL(STR("substring(1 2)")); - - // whitespace after function name - CHECK_XPATH_BOOLEAN(c, STR("true ()"), true); - - // too many arguments - CHECK_XPATH_FAIL(STR("round(1, 2, 3, 4, 5, 6)")); -} - -TEST_XML_FLAGS(xpath_string_value, "pcdata", parse_default | parse_pi | parse_comments) -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_STRING(c, STR("string()"), STR("")); - CHECK_XPATH_STRING(doc, STR("string()"), STR("pcdatacdata")); - CHECK_XPATH_STRING(n, STR("string()"), STR("pcdatacdata")); - CHECK_XPATH_STRING(n, STR("string(c1/node())"), STR("pcdata")); - CHECK_XPATH_STRING(n, STR("string(c2/node())"), STR("")); - CHECK_XPATH_STRING(n, STR("string(c3/@attr)"), STR("avalue")); - CHECK_XPATH_STRING(n, STR("string(c4/node())"), STR("pivalue")); - CHECK_XPATH_STRING(n, STR("string(c5/node())"), STR("comment")); - CHECK_XPATH_STRING(n, STR("string(c6/node())"), STR("cdata")); -} - -TEST_XML(xpath_string_concat_translate, "foobar") -{ - CHECK_XPATH_STRING(doc, STR("concat('a', 'b', 'c', translate(node, 'o', 'a'), 'd')"), STR("abcfaabard")); -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +TEST_XML(xpath_number_number, "123") +{ + xml_node c; + xml_node n = doc.child(STR("node")).first_child(); + + // number with 0 arguments + CHECK_XPATH_NUMBER_NAN(c, STR("number()")); + CHECK_XPATH_NUMBER(n, STR("number()"), 123); + + // number with 1 string argument + CHECK_XPATH_NUMBER(c, STR("number(' -123.456 ')"), -123.456); + CHECK_XPATH_NUMBER(c, STR("number(' -123.')"), -123); + CHECK_XPATH_NUMBER(c, STR("number('123.')"), 123); + CHECK_XPATH_NUMBER(c, STR("number('.56')"), 0.56); + CHECK_XPATH_NUMBER(c, STR("number('123 ')"), 123); + CHECK_XPATH_NUMBER_NAN(c, STR("number('foobar')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('f1')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('1f')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('1.f')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('1.0f')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('123 f')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('.')")); + + // number with 1 bool argument + CHECK_XPATH_NUMBER(c, STR("number(true())"), 1); + CHECK_XPATH_NUMBER(c, STR("number(false())"), 0); + + // number with 1 node set argument + CHECK_XPATH_NUMBER(n, STR("number(.)"), 123); + + // number with 1 number argument + CHECK_XPATH_NUMBER(c, STR("number(1)"), 1); + + // number with 2 arguments + CHECK_XPATH_FAIL(STR("number(1, 2)")); +} + +TEST_XML(xpath_number_sum, "123789") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // sum with 0 arguments + CHECK_XPATH_FAIL(STR("sum()")); + + // sum with 1 argument + CHECK_XPATH_NUMBER(c, STR("sum(.)"), 0); + CHECK_XPATH_NUMBER(n, STR("sum(.)"), 123789); // 123 .. 789 + + CHECK_XPATH_NUMBER(n, STR("sum(./descendant-or-self::node())"), 125490); // node + 123 + child + 789 = 123789 + 123 + 789 + 789 = 125490 + CHECK_XPATH_NUMBER(n, STR("sum(.//node())"), 1701); // 123 + child + 789 = 123 + 789 + 789 + CHECK_XPATH_NUMBER_NAN(doc.last_child(), STR("sum(.)")); + + // sum with 2 arguments + CHECK_XPATH_FAIL(STR("sum(1, 2)")); + + // sum with 1 non-node-set argument + CHECK_XPATH_FAIL(STR("sum(1)")); +} + +TEST(xpath_number_floor) +{ + xml_node c; + + // floor with 0 arguments + CHECK_XPATH_FAIL(STR("floor()")); + + // floor with 1 argument + CHECK_XPATH_NUMBER(c, STR("floor(0)"), 0); + CHECK_XPATH_NUMBER(c, STR("floor(1.2)"), 1); + CHECK_XPATH_NUMBER(c, STR("floor(1)"), 1); + CHECK_XPATH_NUMBER(c, STR("floor(-1.2)"), -2); + CHECK_XPATH_NUMBER_NAN(c, STR("floor(string('nan'))")); + CHECK_XPATH_STRING(c, STR("string(floor(1 div 0))"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("string(floor(-1 div 0))"), STR("-Infinity")); + + // floor with 2 arguments + CHECK_XPATH_FAIL(STR("floor(1, 2)")); + + // floor with argument 0 should return 0 + CHECK_XPATH_STRING(c, STR("string(1 div floor(0))"), STR("Infinity")); + + // floor with argument -0 should return -0 +#if !(defined(__APPLE__) && defined(__MACH__)) // MacOS X gcc 4.0.1 implements floor incorrectly (floor never returns -0) + CHECK_XPATH_STRING(c, STR("string(1 div floor(-0))"), STR("-Infinity")); +#endif +} + +TEST(xpath_number_ceiling) +{ + xml_node c; + + // ceiling with 0 arguments + CHECK_XPATH_FAIL(STR("ceiling()")); + + // ceiling with 1 argument + CHECK_XPATH_NUMBER(c, STR("ceiling(0)"), 0); + CHECK_XPATH_NUMBER(c, STR("ceiling(1.2)"), 2); + CHECK_XPATH_NUMBER(c, STR("ceiling(1)"), 1); + CHECK_XPATH_NUMBER(c, STR("ceiling(-1.2)"), -1); + CHECK_XPATH_NUMBER_NAN(c, STR("ceiling(string('nan'))")); + CHECK_XPATH_STRING(c, STR("string(ceiling(1 div 0))"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("string(ceiling(-1 div 0))"), STR("-Infinity")); + + // ceiling with 2 arguments + CHECK_XPATH_FAIL(STR("ceiling(1, 2)")); + + // ceiling with argument 0 should return 0 + CHECK_XPATH_STRING(c, STR("string(1 div ceiling(0))"), STR("Infinity")); + + // ceiling with argument in range (-1, -0] should result in minus zero +#if !(defined(__APPLE__) && defined(__MACH__)) // MacOS X gcc 4.0.1 implements ceil incorrectly (ceil never returns -0) + CHECK_XPATH_STRING(c, STR("string(1 div ceiling(-0))"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("string(1 div ceiling(-0.1))"), STR("-Infinity")); +#endif +} + +TEST(xpath_number_round) +{ + xml_node c; + + // round with 0 arguments + CHECK_XPATH_FAIL(STR("round()")); + + // round with 1 argument + CHECK_XPATH_NUMBER(c, STR("round(1.2)"), 1); + CHECK_XPATH_NUMBER(c, STR("round(1.5)"), 2); + CHECK_XPATH_NUMBER(c, STR("round(1.8)"), 2); + CHECK_XPATH_NUMBER(c, STR("round(1)"), 1); + CHECK_XPATH_NUMBER(c, STR("round(-1.2)"), -1); + CHECK_XPATH_NUMBER(c, STR("round(-1.5)"), -1); + CHECK_XPATH_NUMBER(c, STR("round(-1.6)"), -2); + CHECK_XPATH_NUMBER_NAN(c, STR("round(string('nan'))")); + CHECK_XPATH_STRING(c, STR("string(round(1 div 0))"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("string(round(-1 div 0))"), STR("-Infinity")); + + // round with 2 arguments + CHECK_XPATH_FAIL(STR("round(1, 2)")); + + // round with argument in range [-0.5, -0] should result in minus zero + CHECK_XPATH_STRING(c, STR("string(1 div round(0))"), STR("Infinity")); + +#if !(defined(__APPLE__) && defined(__MACH__)) // MacOS X gcc 4.0.1 implements ceil incorrectly (ceil never returns -0) + CHECK_XPATH_STRING(c, STR("string(1 div round(-0.5))"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("string(1 div round(-0))"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("string(1 div round(-0.1))"), STR("-Infinity")); +#endif +} + +TEST_XML(xpath_boolean_boolean, "") +{ + xml_node c; + + // boolean with 0 arguments + CHECK_XPATH_FAIL(STR("boolean()")); + + // boolean with 1 number argument + CHECK_XPATH_BOOLEAN(c, STR("boolean(0)"), false); + CHECK_XPATH_BOOLEAN(c, STR("boolean(1)"), true); + CHECK_XPATH_BOOLEAN(c, STR("boolean(-1)"), true); + CHECK_XPATH_BOOLEAN(c, STR("boolean(0.1)"), true); + CHECK_XPATH_BOOLEAN(c, STR("boolean(number('nan'))"), false); + + // boolean with 1 string argument + CHECK_XPATH_BOOLEAN(c, STR("boolean('x')"), true); + CHECK_XPATH_BOOLEAN(c, STR("boolean('')"), false); + + // boolean with 1 node set argument + CHECK_XPATH_BOOLEAN(c, STR("boolean(.)"), false); + CHECK_XPATH_BOOLEAN(doc, STR("boolean(.)"), true); + CHECK_XPATH_BOOLEAN(doc, STR("boolean(foo)"), false); + + // boolean with 2 arguments + CHECK_XPATH_FAIL(STR("boolean(1, 2)")); +} + +TEST(xpath_boolean_not) +{ + xml_node c; + + // not with 0 arguments + CHECK_XPATH_FAIL(STR("not()")); + + // not with 1 argument + CHECK_XPATH_BOOLEAN(c, STR("not(true())"), false); + CHECK_XPATH_BOOLEAN(c, STR("not(false())"), true); + + // boolean with 2 arguments + CHECK_XPATH_FAIL(STR("not(1, 2)")); +} + +TEST(xpath_boolean_true) +{ + xml_node c; + + // true with 0 arguments + CHECK_XPATH_BOOLEAN(c, STR("true()"), true); + + // true with 1 argument + CHECK_XPATH_FAIL(STR("true(1)")); +} + +TEST(xpath_boolean_false) +{ + xml_node c; + + // false with 0 arguments + CHECK_XPATH_BOOLEAN(c, STR("false()"), false); + + // false with 1 argument + CHECK_XPATH_FAIL(STR("false(1)")); +} + +TEST_XML(xpath_boolean_lang, "") +{ + xml_node c; + + // lang with 0 arguments + CHECK_XPATH_FAIL(STR("lang()")); + + // lang with 1 argument, no language + CHECK_XPATH_BOOLEAN(c, STR("lang('en')"), false); + CHECK_XPATH_BOOLEAN(doc.child(STR("foo")), STR("lang('en')"), false); + CHECK_XPATH_BOOLEAN(doc.child(STR("foo")), STR("lang('')"), false); + CHECK_XPATH_BOOLEAN(doc.child(STR("foo")).child(STR("bar")), STR("lang('en')"), false); + + // lang with 1 argument, same language/prefix + CHECK_XPATH_BOOLEAN(doc.child(STR("node")), STR("lang('en')"), true); + CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('ru-uk')"), true); + CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('ru')"), true); + CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")).child(STR("subchild")), STR("lang('ru')"), true); + CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")).child(STR("subchild")), STR("lang('RU')"), true); + + // lang with 1 argument, different language/prefix + CHECK_XPATH_BOOLEAN(doc.child(STR("node")), STR("lang('')"), false); + CHECK_XPATH_BOOLEAN(doc.child(STR("node")), STR("lang('e')"), false); + CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('en')"), false); + CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('ru-gb')"), false); + CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")), STR("lang('r')"), false); + CHECK_XPATH_BOOLEAN(doc.child(STR("node")).child(STR("child")).child(STR("subchild")), STR("lang('en')"), false); + + // lang with 2 arguments + CHECK_XPATH_FAIL(STR("lang(1, 2)")); +} + +TEST_XML(xpath_string_string, "123789100") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // string with 0 arguments + CHECK_XPATH_STRING(c, STR("string()"), STR("")); + CHECK_XPATH_STRING(n.child(STR("child")), STR("string()"), STR("789")); + + // string with 1 node-set argument + CHECK_XPATH_STRING(n, STR("string(child)"), STR("789")); + CHECK_XPATH_STRING(n, STR("string(child/@id)"), STR("1")); + CHECK_XPATH_STRING(n, STR("string(.)"), STR("123789200100")); + + // string with 1 number argument + CHECK_XPATH_STRING(c, STR("string(0 div 0)"), STR("NaN")); + CHECK_XPATH_STRING(c, STR("string(0)"), STR("0")); + CHECK_XPATH_STRING(c, STR("string(-0)"), STR("0")); + CHECK_XPATH_STRING(c, STR("string(1 div 0)"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("string(-1 div -0)"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("string(-1 div 0)"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("string(1 div -0)"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("string(1234567)"), STR("1234567")); + CHECK_XPATH_STRING(c, STR("string(-1234567)"), STR("-1234567")); + CHECK_XPATH_STRING(c, STR("string(1234.5678)"), STR("1234.5678")); + CHECK_XPATH_STRING(c, STR("string(-1234.5678)"), STR("-1234.5678")); + CHECK_XPATH_STRING(c, STR("string(0.5678)"), STR("0.5678")); + CHECK_XPATH_STRING(c, STR("string(-0.5678)"), STR("-0.5678")); + CHECK_XPATH_STRING(c, STR("string(0.0)"), STR("0")); + CHECK_XPATH_STRING(c, STR("string(-0.0)"), STR("0")); + + // string with 1 boolean argument + CHECK_XPATH_STRING(c, STR("string(true())"), STR("true")); + CHECK_XPATH_STRING(c, STR("string(false())"), STR("false")); + + // string with 1 string argument + CHECK_XPATH_STRING(c, STR("string('abc')"), STR("abc")); + + // string with 2 arguments + CHECK_XPATH_FAIL(STR("string(1, 2)")); +} + +TEST(xpath_string_concat) +{ + xml_node c; + + // concat with 0 arguments + CHECK_XPATH_FAIL(STR("concat()")); + + // concat with 1 argument + CHECK_XPATH_FAIL(STR("concat('')")); + + // concat with exactly 2 arguments + CHECK_XPATH_STRING(c, STR("concat('prev','next')"), STR("prevnext")); + CHECK_XPATH_STRING(c, STR("concat('','next')"), STR("next")); + CHECK_XPATH_STRING(c, STR("concat('prev','')"), STR("prev")); + + // concat with 3 or more arguments + CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c')"), STR("abc")); + CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c', 'd')"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c', 'd', 'e')"), STR("abcde")); + CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c', 'd', 'e', 'f')"), STR("abcdef")); + CHECK_XPATH_STRING(c, STR("concat('a', 'b', 'c', 'd', 'e', 'f', 'g')"), STR("abcdefg")); + CHECK_XPATH_STRING(c, STR("concat(1, 2, 3, 4, 5, 6, 7, 8)"), STR("12345678")); +} + +TEST(xpath_string_starts_with) +{ + xml_node c; + + // starts-with with 0 arguments + CHECK_XPATH_FAIL(STR("starts-with()")); + + // starts-with with 1 argument + CHECK_XPATH_FAIL(STR("starts-with('a')")); + + // starts-with with 2 arguments + CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', '')"), true); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', 'a')"), true); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', 'abc')"), true); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', 'abcd')"), false); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('bc', 'c')"), false); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('', 'c')"), false); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('', '')"), true); + + // starts-with with 3 arguments + CHECK_XPATH_FAIL(STR("starts-with('a', 'b', 'c')")); +} + +TEST(xpath_string_contains) +{ + xml_node c; + + // contains with 0 arguments + CHECK_XPATH_FAIL(STR("contains()")); + + // contains with 1 argument + CHECK_XPATH_FAIL(STR("contains('a')")); + + // contains with 2 arguments + CHECK_XPATH_BOOLEAN(c, STR("contains('abc', '')"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'a')"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'abc')"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('abcd', 'bc')"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'abcd')"), false); + CHECK_XPATH_BOOLEAN(c, STR("contains('b', 'bc')"), false); + CHECK_XPATH_BOOLEAN(c, STR("contains('', 'c')"), false); + CHECK_XPATH_BOOLEAN(c, STR("contains('', '')"), true); + + // contains with 3 arguments + CHECK_XPATH_FAIL(STR("contains('a', 'b', 'c')")); +} + +TEST(xpath_string_substring_before) +{ + xml_node c; + + // substring-before with 0 arguments + CHECK_XPATH_FAIL(STR("substring-before()")); + + // substring-before with 1 argument + CHECK_XPATH_FAIL(STR("substring-before('a')")); + + // substring-before with 2 arguments + CHECK_XPATH_STRING(c, STR("substring-before('abc', 'abc')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-before('abc', 'a')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-before('abc', 'cd')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-before('abc', 'b')"), STR("a")); + CHECK_XPATH_STRING(c, STR("substring-before('abc', 'c')"), STR("ab")); + CHECK_XPATH_STRING(c, STR("substring-before('', '')"), STR("")); + + // substring-before with 2 arguments, from W3C standard + CHECK_XPATH_STRING(c, STR("substring-before(\"1999/04/01\",\"/\")"), STR("1999")); + + // substring-before with 3 arguments + CHECK_XPATH_FAIL(STR("substring-before('a', 'b', 'c')")); +} + +TEST(xpath_string_substring_after) +{ + xml_node c; + + // substring-after with 0 arguments + CHECK_XPATH_FAIL(STR("substring-after()")); + + // substring-after with 1 argument + CHECK_XPATH_FAIL(STR("substring-after('a')")); + + // substring-after with 2 arguments + CHECK_XPATH_STRING(c, STR("substring-after('abc', 'abc')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-after('abc', 'a')"), STR("bc")); + CHECK_XPATH_STRING(c, STR("substring-after('abc', 'cd')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-after('abc', 'b')"), STR("c")); + CHECK_XPATH_STRING(c, STR("substring-after('abc', 'c')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-after('', '')"), STR("")); + + // substring-before with 2 arguments, from W3C standard + CHECK_XPATH_STRING(c, STR("substring-after(\"1999/04/01\",\"/\")"), STR("04/01")); + CHECK_XPATH_STRING(c, STR("substring-after(\"1999/04/01\",\"19\")"), STR("99/04/01")); + + // substring-after with 3 arguments + CHECK_XPATH_FAIL(STR("substring-after('a', 'b', 'c')")); +} + +TEST(xpath_string_substring) +{ + xml_node c; + + // substring with 0 arguments + CHECK_XPATH_FAIL(STR("substring()")); + + // substring with 1 argument + CHECK_XPATH_FAIL(STR("substring('')")); + + // substring with 2 arguments + CHECK_XPATH_STRING(c, STR("substring('abcd', 2)"), STR("bcd")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 1)"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 1.1)"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 1.5)"), STR("bcd")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 1.8)"), STR("bcd")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 10)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 0)"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("substring('abcd', -100)"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("substring('abcd', -1 div 0)"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 1 div 0)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 0 div 0)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('', 1)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('', 0)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring(substring('internalexternalcorrect substring',9),9)"), STR("correct substring")); + + // substring with 3 arguments + CHECK_XPATH_STRING(c, STR("substring('abcd', 2, 1)"), STR("b")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 2, 2)"), STR("bc")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 1, 0)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 1, 0.4)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 1, 0.5)"), STR("a")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 10, -5)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 0, -1)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('abcd', -100, 100)"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("substring('abcd', -1 div 0, 4)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 1 div 0, 0 div 0)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('abcd', 0 div 0, 1)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('', 1, 2)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('', 0, 0)"), STR("")); + + // substring with 3 arguments, from W3C standard + CHECK_XPATH_STRING(c, STR("substring('12345', 1.5, 2.6)"), STR("234")); + CHECK_XPATH_STRING(c, STR("substring('12345', 0, 3)"), STR("12")); + CHECK_XPATH_STRING(c, STR("substring('12345', 0 div 0, 3)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('12345', 1, 0 div 0)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('12345', -42, 1 div 0)"), STR("12345")); + CHECK_XPATH_STRING(c, STR("substring('12345', -1 div 0, 1 div 0)"), STR("")); + + // substring with 4 arguments + CHECK_XPATH_FAIL(STR("substring('', 1, 2, 3)")); +} + +TEST_XML(xpath_string_string_length, "123") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // string-length with 0 arguments + CHECK_XPATH_NUMBER(c, STR("string-length()"), 0); + CHECK_XPATH_NUMBER(n, STR("string-length()"), 3); + + // string-length with 1 argument + CHECK_XPATH_NUMBER(c, STR("string-length('')"), 0); + CHECK_XPATH_NUMBER(c, STR("string-length('a')"), 1); + CHECK_XPATH_NUMBER(c, STR("string-length('abcdef')"), 6); + + // string-length with 2 arguments + CHECK_XPATH_FAIL(STR("string-length(1, 2)")); +} + +TEST_XML_FLAGS(xpath_string_normalize_space, " \t\r\rval1 \rval2\r\nval3\nval4\r\r", parse_minimal) +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // normalize-space with 0 arguments + CHECK_XPATH_STRING(c, STR("normalize-space()"), STR("")); + CHECK_XPATH_STRING(n, STR("normalize-space()"), STR("val1 val2 val3 val4")); + + // normalize-space with 1 argument + CHECK_XPATH_STRING(c, STR("normalize-space('')"), STR("")); + CHECK_XPATH_STRING(c, STR("normalize-space('abcd')"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("normalize-space(' \r\nabcd')"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("normalize-space('abcd \n\r')"), STR("abcd")); + CHECK_XPATH_STRING(c, STR("normalize-space('ab\r\n\tcd')"), STR("ab cd")); + CHECK_XPATH_STRING(c, STR("normalize-space('ab cd')"), STR("ab cd")); + CHECK_XPATH_STRING(c, STR("normalize-space('\07')"), STR("\07")); + + // normalize-space with 2 arguments + CHECK_XPATH_FAIL(STR("normalize-space(1, 2)")); +} + +TEST(xpath_string_translate) +{ + xml_node c; + + // translate with 0 arguments + CHECK_XPATH_FAIL(STR("translate()")); + + // translate with 1 argument + CHECK_XPATH_FAIL(STR("translate('a')")); + + // translate with 2 arguments + CHECK_XPATH_FAIL(STR("translate('a', 'b')")); + + // translate with 3 arguments + CHECK_XPATH_STRING(c, STR("translate('abc', '', '')"), STR("abc")); + CHECK_XPATH_STRING(c, STR("translate('abc', '', 'foo')"), STR("abc")); + CHECK_XPATH_STRING(c, STR("translate('abc', 'ab', 'ba')"), STR("bac")); + CHECK_XPATH_STRING(c, STR("translate('abc', 'ab', 'f')"), STR("fc")); + CHECK_XPATH_STRING(c, STR("translate('abc', 'aabb', '1234')"), STR("13c")); + CHECK_XPATH_STRING(c, STR("translate('', 'abc', 'bac')"), STR("")); + + // translate with 3 arguments, from W3C standard + CHECK_XPATH_STRING(c, STR("translate('bar','abc','ABC')"), STR("BAr")); + CHECK_XPATH_STRING(c, STR("translate('--aaa--','abc-','ABC')"), STR("AAA")); + + // translate with 4 arguments + CHECK_XPATH_FAIL(STR("translate('a', 'b', 'c', 'd')")); +} + +TEST_XML(xpath_nodeset_last, "") +{ + xml_node n = doc.child(STR("node")); + + // last with 0 arguments + CHECK_XPATH_NUMBER(n, STR("last()"), 1); + CHECK_XPATH_NODESET(n, STR("c1[last() = 1]")); + CHECK_XPATH_NODESET(n, STR("c1[last() = 2]")) % 3 % 4; // c1, c1 + CHECK_XPATH_NODESET(n, STR("c2/preceding-sibling::node()[last() = 2]")) % 4 % 3; // c1, c1 + + // last with 1 argument + CHECK_XPATH_FAIL(STR("last(c)")); +} + +TEST_XML(xpath_nodeset_position, "") +{ + xml_node n = doc.child(STR("node")); + + // position with 0 arguments + CHECK_XPATH_NUMBER(n, STR("position()"), 1); + CHECK_XPATH_NODESET(n, STR("c1[position() = 0]")); + CHECK_XPATH_NODESET(n, STR("c1[position() = 1]")) % 3; + CHECK_XPATH_NODESET(n, STR("c1[position() = 2]")) % 4; + CHECK_XPATH_NODESET(n, STR("c1[position() = 3]")); + CHECK_XPATH_NODESET(n, STR("c2/preceding-sibling::node()[position() = 1]")) % 4; + CHECK_XPATH_NODESET(n, STR("c2/preceding-sibling::node()[position() = 2]")) % 3; + + // position with 1 argument + CHECK_XPATH_FAIL(STR("position(c)")); +} + +TEST_XML(xpath_nodeset_count, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // count with 0 arguments + CHECK_XPATH_FAIL(STR("count()")); + + // count with 1 non-node-set argument + CHECK_XPATH_FAIL(STR("count(1)")); + CHECK_XPATH_FAIL(STR("count(true())")); + CHECK_XPATH_FAIL(STR("count('')")); + + // count with 1 node-set argument + CHECK_XPATH_NUMBER(c, STR("count(.)"), 0); + CHECK_XPATH_NUMBER(n, STR("count(.)"), 1); + CHECK_XPATH_NUMBER(n, STR("count(c1)"), 2); + CHECK_XPATH_NUMBER(n, STR("count(c2)"), 1); + CHECK_XPATH_NUMBER(n, STR("count(c3)"), 4); + CHECK_XPATH_NUMBER(n, STR("count(c4)"), 0); + + // count with 2 arguments + CHECK_XPATH_FAIL(STR("count(x, y)")); +} + +TEST_XML(xpath_nodeset_id, "") +{ + xml_node n = doc.child(STR("node")); + + // id with 0 arguments + CHECK_XPATH_FAIL(STR("id()")); + + // id with 1 argument - no DTD => no id + CHECK_XPATH_NODESET(n, STR("id('foo')")); + + // id with 2 arguments + CHECK_XPATH_FAIL(STR("id(1, 2)")); +} + +TEST_XML_FLAGS(xpath_nodeset_local_name, "text", parse_default | parse_pi) +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // local-name with 0 arguments + CHECK_XPATH_STRING(c, STR("local-name()"), STR("")); + CHECK_XPATH_STRING(n, STR("local-name()"), STR("node")); + + // local-name with 1 non-node-set argument + CHECK_XPATH_FAIL(STR("local-name(1)")); + + // local-name with 1 node-set argument + CHECK_XPATH_STRING(n, STR("local-name(c1)"), STR("c1")); + CHECK_XPATH_STRING(n, STR("local-name(c2/node())"), STR("child")); + CHECK_XPATH_STRING(n, STR("local-name(c2/attribute::node())"), STR("attr")); + CHECK_XPATH_STRING(n, STR("local-name(c1/node())"), STR("")); + CHECK_XPATH_STRING(n, STR("local-name(c4/node())"), STR("target")); + CHECK_XPATH_STRING(n, STR("local-name(c1/following-sibling::node())"), STR("c2")); + CHECK_XPATH_STRING(n, STR("local-name(c4/preceding-sibling::node())"), STR("c1")); + + // local-name with 2 arguments + CHECK_XPATH_FAIL(STR("local-name(c1, c2)")); +} + +TEST_XML_FLAGS(xpath_nodeset_namespace_uri, "text", parse_default | parse_pi) +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // namespace-uri with 0 arguments + CHECK_XPATH_STRING(c, STR("namespace-uri()"), STR("")); + CHECK_XPATH_STRING(n.child(STR("c2")).child(STR("foo:child")), STR("namespace-uri()"), STR("http://foo2")); + + // namespace-uri with 1 non-node-set argument + CHECK_XPATH_FAIL(STR("namespace-uri(1)")); + + // namespace-uri with 1 node-set argument + CHECK_XPATH_STRING(n, STR("namespace-uri(c1)"), STR("")); + CHECK_XPATH_STRING(n, STR("namespace-uri(c5/child::node())"), STR("http://foo")); + CHECK_XPATH_STRING(n, STR("namespace-uri(c2/attribute::node())"), STR("http://foo2")); + CHECK_XPATH_STRING(n, STR("namespace-uri(c2/child::node())"), STR("http://foo2")); + CHECK_XPATH_STRING(n, STR("namespace-uri(c1/child::node())"), STR("")); + CHECK_XPATH_STRING(n, STR("namespace-uri(c4/child::node())"), STR("")); + CHECK_XPATH_STRING(n, STR("namespace-uri(c3)"), STR("http://def")); + CHECK_XPATH_STRING(n, STR("namespace-uri(c3/@attr)"), STR("")); // the namespace name for an unprefixed attribute name always has no value (Namespaces in XML 1.0) + CHECK_XPATH_STRING(n, STR("namespace-uri(c3/child::node())"), STR("http://def")); + CHECK_XPATH_STRING(n, STR("namespace-uri(c6/@bar:attr)"), STR("")); + + // namespace-uri with 2 arguments + CHECK_XPATH_FAIL(STR("namespace-uri(c1, c2)")); +} + +TEST_XML_FLAGS(xpath_nodeset_name, "text", parse_default | parse_pi) +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // name with 0 arguments + CHECK_XPATH_STRING(c, STR("name()"), STR("")); + CHECK_XPATH_STRING(n, STR("name()"), STR("node")); + + // name with 1 non-node-set argument + CHECK_XPATH_FAIL(STR("name(1)")); + + // name with 1 node-set argument + CHECK_XPATH_STRING(n, STR("name(c1)"), STR("c1")); + CHECK_XPATH_STRING(n, STR("name(c2/node())"), STR("foo:child")); + CHECK_XPATH_STRING(n, STR("name(c2/attribute::node())"), STR("foo:attr")); + CHECK_XPATH_STRING(n, STR("name(c1/node())"), STR("")); + CHECK_XPATH_STRING(n, STR("name(c4/node())"), STR("target")); + CHECK_XPATH_STRING(n, STR("name(c1/following-sibling::node())"), STR("c2")); + CHECK_XPATH_STRING(n, STR("name(c4/preceding-sibling::node())"), STR("c1")); + + // name with 2 arguments + CHECK_XPATH_FAIL(STR("name(c1, c2)")); +} + +TEST(xpath_function_arguments) +{ + xml_node c; + + // conversion to string + CHECK_XPATH_NUMBER(c, STR("string-length(12)"), 2); + + // conversion to number + CHECK_XPATH_NUMBER(c, STR("round('1.2')"), 1); + CHECK_XPATH_NUMBER(c, STR("round('1.7')"), 2); + + // conversion to boolean + CHECK_XPATH_BOOLEAN(c, STR("not('1')"), false); + CHECK_XPATH_BOOLEAN(c, STR("not('')"), true); + + // conversion to node set + CHECK_XPATH_FAIL(STR("sum(1)")); + + // expression evaluation + CHECK_XPATH_NUMBER(c, STR("round((2 + 2 * 2) div 4)"), 2); + + // empty expressions + CHECK_XPATH_FAIL(STR("round(,)")); + CHECK_XPATH_FAIL(STR("substring(,)")); + CHECK_XPATH_FAIL(STR("substring('a',)")); + CHECK_XPATH_FAIL(STR("substring(,'a')")); + + // extra commas + CHECK_XPATH_FAIL(STR("round(,1)")); + CHECK_XPATH_FAIL(STR("round(1,)")); + + // lack of commas + CHECK_XPATH_FAIL(STR("substring(1 2)")); + + // whitespace after function name + CHECK_XPATH_BOOLEAN(c, STR("true ()"), true); + + // too many arguments + CHECK_XPATH_FAIL(STR("round(1, 2, 3, 4, 5, 6)")); +} + +TEST_XML_FLAGS(xpath_string_value, "pcdata", parse_default | parse_pi | parse_comments) +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_STRING(c, STR("string()"), STR("")); + CHECK_XPATH_STRING(doc, STR("string()"), STR("pcdatacdata")); + CHECK_XPATH_STRING(n, STR("string()"), STR("pcdatacdata")); + CHECK_XPATH_STRING(n, STR("string(c1/node())"), STR("pcdata")); + CHECK_XPATH_STRING(n, STR("string(c2/node())"), STR("")); + CHECK_XPATH_STRING(n, STR("string(c3/@attr)"), STR("avalue")); + CHECK_XPATH_STRING(n, STR("string(c4/node())"), STR("pivalue")); + CHECK_XPATH_STRING(n, STR("string(c5/node())"), STR("comment")); + CHECK_XPATH_STRING(n, STR("string(c6/node())"), STR("cdata")); +} + +TEST_XML(xpath_string_concat_translate, "foobar") +{ + CHECK_XPATH_STRING(doc, STR("concat('a', 'b', 'c', translate(node, 'o', 'a'), 'd')"), STR("abcfaabard")); +} + +#endif diff --git a/tests/test_xpath_operators.cpp b/tests/test_xpath_operators.cpp index b834b95..c7fe165 100644 --- a/tests/test_xpath_operators.cpp +++ b/tests/test_xpath_operators.cpp @@ -1,473 +1,473 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -TEST(xpath_operators_arithmetic) -{ - xml_node c; - - // incorrect unary operator - CHECK_XPATH_FAIL(STR("-")); - - // correct unary operator - CHECK_XPATH_NUMBER(c, STR("-1"), -1); - CHECK_XPATH_NUMBER(c, STR("--1"), 1); - CHECK_XPATH_NUMBER(c, STR("---1"), -1); - - // incorrect binary operators - CHECK_XPATH_FAIL(STR("5+")); - CHECK_XPATH_FAIL(STR("5-")); - CHECK_XPATH_FAIL(STR("5*")); - CHECK_XPATH_FAIL(STR("+5")); - CHECK_XPATH_FAIL(STR("*5")); - CHECK_XPATH_FAIL(STR("1div2")); - CHECK_XPATH_FAIL(STR("1mod")); - CHECK_XPATH_FAIL(STR("1div")); - - // correct trivial binary operators - CHECK_XPATH_NUMBER(c, STR("1 + 2"), 3); - CHECK_XPATH_NUMBER(c, STR("1+2"), 3); - CHECK_XPATH_NUMBER(c, STR("1 * 2"), 2); - CHECK_XPATH_NUMBER(c, STR("1*2"), 2); - CHECK_XPATH_NUMBER(c, STR("1 div 2"), 0.5); - - // operator precedence - CHECK_XPATH_NUMBER(c, STR("2 + 2 * 2 div 1 mod 3"), 3); - CHECK_XPATH_NUMBER(c, STR("2 + 2 * 2 div (1 mod 3)"), 6); - CHECK_XPATH_NUMBER(c, STR("(2 + 2) * 2 div (1 mod 3)"), 8); - CHECK_XPATH_NUMBER(c, STR("(2 + 2) * (2 div 1) mod 3"), 2); - CHECK_XPATH_NUMBER(c, STR("2 - -2"), 4); - CHECK_XPATH_NUMBER(c, STR("2 + -2"), 0); - CHECK_XPATH_NUMBER(c, STR("2--2"), 4); - CHECK_XPATH_NUMBER(c, STR("2+-2"), 0); - CHECK_XPATH_NUMBER(c, STR("1-2-3"), -4); - - // mod, from W3C standard - CHECK_XPATH_NUMBER(c, STR("5 mod 2"), 1); - CHECK_XPATH_NUMBER(c, STR("5 mod -2"), 1); - CHECK_XPATH_NUMBER(c, STR("-5 mod 2"), -1); - CHECK_XPATH_NUMBER(c, STR("-5 mod -2"), -1); -} - -TEST(xpath_operators_arithmetic_specials) -{ - xml_node c; - - // infinity/nan - CHECK_XPATH_STRING(c, STR("1 div 0"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("-1 div 0"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("-1 div 0 + 1 div 0"), STR("NaN")); - CHECK_XPATH_STRING(c, STR("0 div 0"), STR("NaN")); - CHECK_XPATH_STRING(c, STR("1 div 0 + 1 div 0"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("-1 div 0 + -1 div 0"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("1 div 0 + 100"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("-1 div 0 + 100"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("0 div 0 + 100"), STR("NaN")); - - // unary - and multiplication clarifications from recommendations errata - CHECK_XPATH_STRING(c, STR("1 div -0"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("-1 div -0"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("1 div (-0 * 1)"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("-1 div (0 * -1)"), STR("Infinity")); - CHECK_XPATH_STRING(c, STR("1 div (-0 div 1)"), STR("-Infinity")); - CHECK_XPATH_STRING(c, STR("-1 div (0 div -1)"), STR("Infinity")); -} - -TEST_XML(xpath_operators_arithmetic_subtraction_parse, "1023") -{ - xml_node n = doc.child(STR("node")); - - // correct subtraction parsing, from W3C standard - CHECK_XPATH_NUMBER(n, STR("foo-bar"), 10); - CHECK_XPATH_NUMBER(n, STR("foo -bar"), -1); - CHECK_XPATH_NUMBER(n, STR("foo - bar"), -1); - CHECK_XPATH_NUMBER(n, STR("-foo-bar"), -10); - CHECK_XPATH_NUMBER(n, STR("-foo -bar"), -5); -} - -TEST(xpath_operators_logical) -{ - xml_node c; - - // boolean arithmetic - CHECK_XPATH_BOOLEAN(c, STR("true() or true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("true() or false()"), true); - CHECK_XPATH_BOOLEAN(c, STR("false() or false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("false() or true()"), true); - - CHECK_XPATH_BOOLEAN(c, STR("true() and true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("true() and false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("false() and false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("false() and true()"), false); - - // boolean conversion - CHECK_XPATH_BOOLEAN(c, STR("1 or ''"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 and ''"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 or ''"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 or 'a'"), true); -} - -TEST(xpath_operators_equality_primitive_boolean) -{ - xml_node c; - - // boolean vs boolan - CHECK_XPATH_BOOLEAN(c, STR("true() = true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("false() = false()"), true); - CHECK_XPATH_BOOLEAN(c, STR("true() != false()"), true); - CHECK_XPATH_BOOLEAN(c, STR("false() != false()"), false); - - // upcast to boolean - CHECK_XPATH_BOOLEAN(c, STR("true() = 2"), true); - CHECK_XPATH_BOOLEAN(c, STR("true() != 2"), false); - CHECK_XPATH_BOOLEAN(c, STR("false() = 2"), false); - CHECK_XPATH_BOOLEAN(c, STR("false() != 2"), true); - CHECK_XPATH_BOOLEAN(c, STR("false() = 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("false() != 0"), false); - - CHECK_XPATH_BOOLEAN(c, STR("2 = true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 != true()"), false); - CHECK_XPATH_BOOLEAN(c, STR("2 = false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("2 != false()"), true); - CHECK_XPATH_BOOLEAN(c, STR("0 = false()"), true); - CHECK_XPATH_BOOLEAN(c, STR("0 != false()"), false); -} - -TEST(xpath_operators_equality_primitive_number) -{ - xml_node c; - - // number vs number - CHECK_XPATH_BOOLEAN(c, STR("1 = 1"), true); - CHECK_XPATH_BOOLEAN(c, STR("0.5 = 0.5"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 != 2"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 = -1"), false); - - // infinity/nan - CHECK_XPATH_BOOLEAN(c, STR("1 div 0 = 2 div 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("-1 div 0 != 2 div 0"), true); - -#ifndef MSVC6_NAN_BUG - CHECK_XPATH_BOOLEAN(c, STR("0 div 0 = 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 div 0 != 1"), true); - CHECK_XPATH_BOOLEAN(c, STR("0 div 0 = 0 div 0"), false); -#endif - - // upcast to number - CHECK_XPATH_BOOLEAN(c, STR("2 = '2'"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 != '2'"), false); - CHECK_XPATH_BOOLEAN(c, STR("'1' != 2"), true); - CHECK_XPATH_BOOLEAN(c, STR("'1' = 2"), false); -} - -TEST(xpath_operators_equality_primitive_string) -{ - xml_node c; - - // string vs string - CHECK_XPATH_BOOLEAN(c, STR("'a' = 'a'"), true); - CHECK_XPATH_BOOLEAN(c, STR("'a' = 'b'"), false); - CHECK_XPATH_BOOLEAN(c, STR("'ab' != 'a'"), true); - CHECK_XPATH_BOOLEAN(c, STR("'' != 'a'"), true); - CHECK_XPATH_BOOLEAN(c, STR("'a' != ''"), true); - CHECK_XPATH_BOOLEAN(c, STR("'' != ''"), false); -} - -TEST_XML(xpath_operators_equality_node_set_node_set, "abacbdabb") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // node set vs node set - CHECK_XPATH_BOOLEAN(c, STR("x = x"), false); // empty node set compares as false with any other object via any comparison operator, as per XPath spec - CHECK_XPATH_BOOLEAN(c, STR("x != x"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = c2/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = c3/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c2/v = c3/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = c4/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = x"), false); - CHECK_XPATH_BOOLEAN(n, STR("x = c1"), false); - - CHECK_XPATH_BOOLEAN(n, STR("c1/v != c2/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v != c3/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c2/v != c3/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v != c4/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v != c5/v"), true); // (a, b) != (a, b), since a != b, as per XPath spec (comparison operators are so not intutive) - CHECK_XPATH_BOOLEAN(n, STR("c3/v != c6/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v != x"), false); - CHECK_XPATH_BOOLEAN(n, STR("x != c1/v"), false); -} - -TEST_XML(xpath_operators_equality_node_set_primitive, "1-11001nan") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // node set vs number - CHECK_XPATH_BOOLEAN(c, STR("x = 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("x != 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 = x"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 != x"), false); - - CHECK_XPATH_BOOLEAN(n, STR("c1/v = 1"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = -1"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v != 1"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = 5"), false); - CHECK_XPATH_BOOLEAN(n, STR("c2/v = 1"), true); - - CHECK_XPATH_BOOLEAN(n, STR("1 = c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("-1 = c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("1 != c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("5 = c1/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("1 = c2/v"), true); - -#ifndef MSVC6_NAN_BUG - CHECK_XPATH_BOOLEAN(n, STR("c2/v != 1"), true); - CHECK_XPATH_BOOLEAN(n, STR("1 != c2/v"), true); -#endif - - // node set vs string - CHECK_XPATH_BOOLEAN(c, STR("x = '1'"), false); - CHECK_XPATH_BOOLEAN(c, STR("x != '1'"), false); - CHECK_XPATH_BOOLEAN(c, STR("'1' = x"), false); - CHECK_XPATH_BOOLEAN(c, STR("'1' != x"), false); - - CHECK_XPATH_BOOLEAN(n, STR("c1/v = '1'"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = '-1'"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v != '1'"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = '5'"), false); - CHECK_XPATH_BOOLEAN(n, STR("c2/v = '1'"), true); - CHECK_XPATH_BOOLEAN(n, STR("c2/v != '1'"), true); - - CHECK_XPATH_BOOLEAN(n, STR("'1' = c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("'-1' = c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("'1' != c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("'5' = c1/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("'1' = c2/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("'1' != c2/v"), true); - - // node set vs almost-numeric string just in case - CHECK_XPATH_BOOLEAN(n, STR("c1/v = '1.0'"), false); - - // node set vs boolean - special rules! empty sets are equal to true() - CHECK_XPATH_BOOLEAN(n, STR("x = true()"), false); - CHECK_XPATH_BOOLEAN(n, STR("x != true()"), true); - CHECK_XPATH_BOOLEAN(n, STR("x = false()"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = true()"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v != true()"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v = false()"), false); - - CHECK_XPATH_BOOLEAN(n, STR("true() = x"), false); - CHECK_XPATH_BOOLEAN(n, STR("true() != x"), true); - CHECK_XPATH_BOOLEAN(n, STR("false() = x"), true); - CHECK_XPATH_BOOLEAN(n, STR("true() = c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("true() != c1/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("false() = c1/v"), false); -} - -TEST(xpath_operators_inequality_primitive) -{ - xml_node c; - - // number vs number - CHECK_XPATH_BOOLEAN(c, STR("1 < 2"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 <= 2"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 > 2"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 >= 2"), false); - - CHECK_XPATH_BOOLEAN(c, STR("1 < 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 <= 1"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 > 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 >= 1"), true); - - // infinity/nan - CHECK_XPATH_BOOLEAN(c, STR("1 div 0 <= 2 div 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 div 0 < 2 div 0"), false); - CHECK_XPATH_BOOLEAN(c, STR("-1 div 0 < 2 div 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("-1 div 0 > 2 div 0"), false); - -#ifndef MSVC6_NAN_BUG - CHECK_XPATH_BOOLEAN(c, STR("0 div 0 < 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 div 0 <= 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 div 0 > 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 div 0 >= 1"), false); -#endif - - // upcast to number - CHECK_XPATH_BOOLEAN(c, STR("2 < '2'"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 < '2'"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 <= '2'"), true); - CHECK_XPATH_BOOLEAN(c, STR("3 <= '2'"), false); - CHECK_XPATH_BOOLEAN(c, STR("2 > '2'"), false); - CHECK_XPATH_BOOLEAN(c, STR("3 > '2'"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 >= '2'"), true); - CHECK_XPATH_BOOLEAN(c, STR("3 >= '2'"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 >= true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 > true()"), false); -} - -TEST_XML(xpath_operators_inequality_node_set_node_set, "1-1-1001nan1-4") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // node set vs node set - CHECK_XPATH_BOOLEAN(c, STR("x < x"), false); - CHECK_XPATH_BOOLEAN(c, STR("x > x"), false); - CHECK_XPATH_BOOLEAN(c, STR("x <= x"), false); - CHECK_XPATH_BOOLEAN(c, STR("x >= x"), false); - - CHECK_XPATH_BOOLEAN(n, STR("c1/v > x"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v < x"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v >= x"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v <= x"), false); - - CHECK_XPATH_BOOLEAN(n, STR("x > c1/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("x < c1/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("x >= c1/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("x <= c1/v"), false); - - CHECK_XPATH_BOOLEAN(n, STR("c1/v > c3/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v >= c3/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v < c3/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v <= c3/v"), true); - -#ifndef MSVC6_NAN_BUG - CHECK_XPATH_BOOLEAN(n, STR("c1/v > c2/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v >= c2/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v < c2/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v <= c2/v"), true); -#endif -} - -TEST_XML(xpath_operators_inequality_node_set_primitive, "1-1-1001nan") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // node set vs number - CHECK_XPATH_BOOLEAN(c, STR("x < 0"), false); - CHECK_XPATH_BOOLEAN(c, STR("x > 0"), false); - CHECK_XPATH_BOOLEAN(c, STR("x <= 0"), false); - CHECK_XPATH_BOOLEAN(c, STR("x >= 0"), false); - - CHECK_XPATH_BOOLEAN(c, STR("0 < x"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 > x"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 <= x"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 >= x"), false); - - CHECK_XPATH_BOOLEAN(n, STR("c1/v > 0"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v > 1"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v >= 0"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v < 0"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v <= 0"), true); - - CHECK_XPATH_BOOLEAN(n, STR("0 < c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("1 < c1/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("0 <= c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("0 > c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("0 >= c1/v"), true); - - // node set vs string - CHECK_XPATH_BOOLEAN(n, STR("c1/v > '0'"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v > '1'"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v >= '0'"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v < '0'"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v <= '0'"), true); - - CHECK_XPATH_BOOLEAN(n, STR("'0' < c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("'1' < c1/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("'0' <= c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("'0' > c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("'0' >= c1/v"), true); - - // node set vs boolean - CHECK_XPATH_BOOLEAN(n, STR("c1/v > false()"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v > true()"), false); - CHECK_XPATH_BOOLEAN(n, STR("c1/v >= false()"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v < false()"), true); - CHECK_XPATH_BOOLEAN(n, STR("c1/v <= false()"), true); - - CHECK_XPATH_BOOLEAN(n, STR("false() < c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("true() < c1/v"), false); - CHECK_XPATH_BOOLEAN(n, STR("false() <= c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("false() > c1/v"), true); - CHECK_XPATH_BOOLEAN(n, STR("false() >= c1/v"), true); -} - -TEST(xpath_operators_boolean_precedence) -{ - xml_node c; - - CHECK_XPATH_BOOLEAN(c, STR("1 = 0 or 2 = 2"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 = (0 or 2) = false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 < 0 or 2 > 2"), false); - CHECK_XPATH_BOOLEAN(c, STR("2 < 1 = false()"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 < (1 = false())"), false); - CHECK_XPATH_BOOLEAN(c, STR("3 > 2 > 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("(3 > 2) > 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("3 > (2 > 1)"), true); -} - -TEST_XML(xpath_operators_union, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(n, STR("employee | .")) % 2 % 3 % 4 % 6 % 8 % 11; - CHECK_XPATH_NODESET(n, STR("employee[@secretary] | employee[@assistant]")) % 4 % 6 % 8 % 11; - CHECK_XPATH_NODESET(n, STR("employee[@assistant] | employee[@secretary]")) % 4 % 6 % 8 % 11; - CHECK_XPATH_NODESET(n, STR("employee[@secretary] | employee[@nobody]")) % 4 % 8 % 11; - CHECK_XPATH_NODESET(n, STR("employee[@nobody] | employee[@secretary]")) % 4 % 8 % 11; - CHECK_XPATH_NODESET(n, STR("tail/preceding-sibling::employee | .")) % 2 % 3 % 4 % 6 % 8 % 11; - CHECK_XPATH_NODESET(n, STR(". | tail/preceding-sibling::employee | .")) % 2 % 3 % 4 % 6 % 8 % 11; -} - -TEST(xpath_operators_union_error) -{ - CHECK_XPATH_FAIL(STR(". | true()")); - CHECK_XPATH_FAIL(STR(". | 1")); - CHECK_XPATH_FAIL(STR(". | '1'")); - CHECK_XPATH_FAIL(STR(". | count(.)")); - CHECK_XPATH_FAIL(STR("true() | .")); - CHECK_XPATH_FAIL(STR("1 | .")); - CHECK_XPATH_FAIL(STR("'1' | .")); - CHECK_XPATH_FAIL(STR("count(.) | .")); -} - -TEST(xpath_operators_associativity_boolean) -{ - xml_node c; - - CHECK_XPATH_BOOLEAN(c, STR("false() or true() and true() and false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("3 > 2 > 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("4 > 3 > 2 > 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("5 > 4 > 3 > 2 > 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 < 2 < 3 < 4 < 5"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 <= 2 <= 3 <= 4 <= 5"), true); - CHECK_XPATH_BOOLEAN(c, STR("5 >= 4 >= 3 >= 2 >= 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("3 >= 2 >= 1"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 >= 1"), true); - CHECK_XPATH_BOOLEAN(c, STR("4 >= 3 >= 2 >= 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("((((5 > 4) > 3) > 2) > 1)"), false); - CHECK_XPATH_BOOLEAN(c, STR("2 != 3 != 1 != 4 != 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("(((2 != 3) != 1) != 4) != 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 != 3 != 1 != 4 != 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("(((2 != 3) != 1) != 4) != 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("2 = 3 = 1 = 4 = 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("(((2 = 3) = 1) = 4) = 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 = 3 = 1 = 4 = 1"), false); - CHECK_XPATH_BOOLEAN(c, STR("(((2 = 3) = 1) = 4) = 1"), false); -} - -TEST(xpath_operators_associativity_arithmetic) -{ - xml_node c; - - CHECK_XPATH_NUMBER(c, STR("2+1-1+1"), 3); - CHECK_XPATH_NUMBER(c, STR("1+2+1-1+1"), 4); - CHECK_XPATH_NUMBER(c, STR("1+1+2+1-1+1"), 5); - CHECK_XPATH_NUMBER(c, STR("1-1+1"), 1); -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +TEST(xpath_operators_arithmetic) +{ + xml_node c; + + // incorrect unary operator + CHECK_XPATH_FAIL(STR("-")); + + // correct unary operator + CHECK_XPATH_NUMBER(c, STR("-1"), -1); + CHECK_XPATH_NUMBER(c, STR("--1"), 1); + CHECK_XPATH_NUMBER(c, STR("---1"), -1); + + // incorrect binary operators + CHECK_XPATH_FAIL(STR("5+")); + CHECK_XPATH_FAIL(STR("5-")); + CHECK_XPATH_FAIL(STR("5*")); + CHECK_XPATH_FAIL(STR("+5")); + CHECK_XPATH_FAIL(STR("*5")); + CHECK_XPATH_FAIL(STR("1div2")); + CHECK_XPATH_FAIL(STR("1mod")); + CHECK_XPATH_FAIL(STR("1div")); + + // correct trivial binary operators + CHECK_XPATH_NUMBER(c, STR("1 + 2"), 3); + CHECK_XPATH_NUMBER(c, STR("1+2"), 3); + CHECK_XPATH_NUMBER(c, STR("1 * 2"), 2); + CHECK_XPATH_NUMBER(c, STR("1*2"), 2); + CHECK_XPATH_NUMBER(c, STR("1 div 2"), 0.5); + + // operator precedence + CHECK_XPATH_NUMBER(c, STR("2 + 2 * 2 div 1 mod 3"), 3); + CHECK_XPATH_NUMBER(c, STR("2 + 2 * 2 div (1 mod 3)"), 6); + CHECK_XPATH_NUMBER(c, STR("(2 + 2) * 2 div (1 mod 3)"), 8); + CHECK_XPATH_NUMBER(c, STR("(2 + 2) * (2 div 1) mod 3"), 2); + CHECK_XPATH_NUMBER(c, STR("2 - -2"), 4); + CHECK_XPATH_NUMBER(c, STR("2 + -2"), 0); + CHECK_XPATH_NUMBER(c, STR("2--2"), 4); + CHECK_XPATH_NUMBER(c, STR("2+-2"), 0); + CHECK_XPATH_NUMBER(c, STR("1-2-3"), -4); + + // mod, from W3C standard + CHECK_XPATH_NUMBER(c, STR("5 mod 2"), 1); + CHECK_XPATH_NUMBER(c, STR("5 mod -2"), 1); + CHECK_XPATH_NUMBER(c, STR("-5 mod 2"), -1); + CHECK_XPATH_NUMBER(c, STR("-5 mod -2"), -1); +} + +TEST(xpath_operators_arithmetic_specials) +{ + xml_node c; + + // infinity/nan + CHECK_XPATH_STRING(c, STR("1 div 0"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("-1 div 0"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("-1 div 0 + 1 div 0"), STR("NaN")); + CHECK_XPATH_STRING(c, STR("0 div 0"), STR("NaN")); + CHECK_XPATH_STRING(c, STR("1 div 0 + 1 div 0"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("-1 div 0 + -1 div 0"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("1 div 0 + 100"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("-1 div 0 + 100"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("0 div 0 + 100"), STR("NaN")); + + // unary - and multiplication clarifications from recommendations errata + CHECK_XPATH_STRING(c, STR("1 div -0"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("-1 div -0"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("1 div (-0 * 1)"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("-1 div (0 * -1)"), STR("Infinity")); + CHECK_XPATH_STRING(c, STR("1 div (-0 div 1)"), STR("-Infinity")); + CHECK_XPATH_STRING(c, STR("-1 div (0 div -1)"), STR("Infinity")); +} + +TEST_XML(xpath_operators_arithmetic_subtraction_parse, "1023") +{ + xml_node n = doc.child(STR("node")); + + // correct subtraction parsing, from W3C standard + CHECK_XPATH_NUMBER(n, STR("foo-bar"), 10); + CHECK_XPATH_NUMBER(n, STR("foo -bar"), -1); + CHECK_XPATH_NUMBER(n, STR("foo - bar"), -1); + CHECK_XPATH_NUMBER(n, STR("-foo-bar"), -10); + CHECK_XPATH_NUMBER(n, STR("-foo -bar"), -5); +} + +TEST(xpath_operators_logical) +{ + xml_node c; + + // boolean arithmetic + CHECK_XPATH_BOOLEAN(c, STR("true() or true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("true() or false()"), true); + CHECK_XPATH_BOOLEAN(c, STR("false() or false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("false() or true()"), true); + + CHECK_XPATH_BOOLEAN(c, STR("true() and true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("true() and false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("false() and false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("false() and true()"), false); + + // boolean conversion + CHECK_XPATH_BOOLEAN(c, STR("1 or ''"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 and ''"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 or ''"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 or 'a'"), true); +} + +TEST(xpath_operators_equality_primitive_boolean) +{ + xml_node c; + + // boolean vs boolan + CHECK_XPATH_BOOLEAN(c, STR("true() = true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("false() = false()"), true); + CHECK_XPATH_BOOLEAN(c, STR("true() != false()"), true); + CHECK_XPATH_BOOLEAN(c, STR("false() != false()"), false); + + // upcast to boolean + CHECK_XPATH_BOOLEAN(c, STR("true() = 2"), true); + CHECK_XPATH_BOOLEAN(c, STR("true() != 2"), false); + CHECK_XPATH_BOOLEAN(c, STR("false() = 2"), false); + CHECK_XPATH_BOOLEAN(c, STR("false() != 2"), true); + CHECK_XPATH_BOOLEAN(c, STR("false() = 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("false() != 0"), false); + + CHECK_XPATH_BOOLEAN(c, STR("2 = true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 != true()"), false); + CHECK_XPATH_BOOLEAN(c, STR("2 = false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("2 != false()"), true); + CHECK_XPATH_BOOLEAN(c, STR("0 = false()"), true); + CHECK_XPATH_BOOLEAN(c, STR("0 != false()"), false); +} + +TEST(xpath_operators_equality_primitive_number) +{ + xml_node c; + + // number vs number + CHECK_XPATH_BOOLEAN(c, STR("1 = 1"), true); + CHECK_XPATH_BOOLEAN(c, STR("0.5 = 0.5"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 != 2"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 = -1"), false); + + // infinity/nan + CHECK_XPATH_BOOLEAN(c, STR("1 div 0 = 2 div 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("-1 div 0 != 2 div 0"), true); + +#ifndef MSVC6_NAN_BUG + CHECK_XPATH_BOOLEAN(c, STR("0 div 0 = 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 div 0 != 1"), true); + CHECK_XPATH_BOOLEAN(c, STR("0 div 0 = 0 div 0"), false); +#endif + + // upcast to number + CHECK_XPATH_BOOLEAN(c, STR("2 = '2'"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 != '2'"), false); + CHECK_XPATH_BOOLEAN(c, STR("'1' != 2"), true); + CHECK_XPATH_BOOLEAN(c, STR("'1' = 2"), false); +} + +TEST(xpath_operators_equality_primitive_string) +{ + xml_node c; + + // string vs string + CHECK_XPATH_BOOLEAN(c, STR("'a' = 'a'"), true); + CHECK_XPATH_BOOLEAN(c, STR("'a' = 'b'"), false); + CHECK_XPATH_BOOLEAN(c, STR("'ab' != 'a'"), true); + CHECK_XPATH_BOOLEAN(c, STR("'' != 'a'"), true); + CHECK_XPATH_BOOLEAN(c, STR("'a' != ''"), true); + CHECK_XPATH_BOOLEAN(c, STR("'' != ''"), false); +} + +TEST_XML(xpath_operators_equality_node_set_node_set, "abacbdabb") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // node set vs node set + CHECK_XPATH_BOOLEAN(c, STR("x = x"), false); // empty node set compares as false with any other object via any comparison operator, as per XPath spec + CHECK_XPATH_BOOLEAN(c, STR("x != x"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = c2/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = c3/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c2/v = c3/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = c4/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = x"), false); + CHECK_XPATH_BOOLEAN(n, STR("x = c1"), false); + + CHECK_XPATH_BOOLEAN(n, STR("c1/v != c2/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v != c3/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c2/v != c3/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v != c4/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v != c5/v"), true); // (a, b) != (a, b), since a != b, as per XPath spec (comparison operators are so not intutive) + CHECK_XPATH_BOOLEAN(n, STR("c3/v != c6/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v != x"), false); + CHECK_XPATH_BOOLEAN(n, STR("x != c1/v"), false); +} + +TEST_XML(xpath_operators_equality_node_set_primitive, "1-11001nan") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // node set vs number + CHECK_XPATH_BOOLEAN(c, STR("x = 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("x != 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 = x"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 != x"), false); + + CHECK_XPATH_BOOLEAN(n, STR("c1/v = 1"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = -1"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v != 1"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = 5"), false); + CHECK_XPATH_BOOLEAN(n, STR("c2/v = 1"), true); + + CHECK_XPATH_BOOLEAN(n, STR("1 = c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("-1 = c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("1 != c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("5 = c1/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("1 = c2/v"), true); + +#ifndef MSVC6_NAN_BUG + CHECK_XPATH_BOOLEAN(n, STR("c2/v != 1"), true); + CHECK_XPATH_BOOLEAN(n, STR("1 != c2/v"), true); +#endif + + // node set vs string + CHECK_XPATH_BOOLEAN(c, STR("x = '1'"), false); + CHECK_XPATH_BOOLEAN(c, STR("x != '1'"), false); + CHECK_XPATH_BOOLEAN(c, STR("'1' = x"), false); + CHECK_XPATH_BOOLEAN(c, STR("'1' != x"), false); + + CHECK_XPATH_BOOLEAN(n, STR("c1/v = '1'"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = '-1'"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v != '1'"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = '5'"), false); + CHECK_XPATH_BOOLEAN(n, STR("c2/v = '1'"), true); + CHECK_XPATH_BOOLEAN(n, STR("c2/v != '1'"), true); + + CHECK_XPATH_BOOLEAN(n, STR("'1' = c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("'-1' = c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("'1' != c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("'5' = c1/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("'1' = c2/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("'1' != c2/v"), true); + + // node set vs almost-numeric string just in case + CHECK_XPATH_BOOLEAN(n, STR("c1/v = '1.0'"), false); + + // node set vs boolean - special rules! empty sets are equal to true() + CHECK_XPATH_BOOLEAN(n, STR("x = true()"), false); + CHECK_XPATH_BOOLEAN(n, STR("x != true()"), true); + CHECK_XPATH_BOOLEAN(n, STR("x = false()"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = true()"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v != true()"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v = false()"), false); + + CHECK_XPATH_BOOLEAN(n, STR("true() = x"), false); + CHECK_XPATH_BOOLEAN(n, STR("true() != x"), true); + CHECK_XPATH_BOOLEAN(n, STR("false() = x"), true); + CHECK_XPATH_BOOLEAN(n, STR("true() = c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("true() != c1/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("false() = c1/v"), false); +} + +TEST(xpath_operators_inequality_primitive) +{ + xml_node c; + + // number vs number + CHECK_XPATH_BOOLEAN(c, STR("1 < 2"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 <= 2"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 > 2"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 >= 2"), false); + + CHECK_XPATH_BOOLEAN(c, STR("1 < 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 <= 1"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 > 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 >= 1"), true); + + // infinity/nan + CHECK_XPATH_BOOLEAN(c, STR("1 div 0 <= 2 div 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 div 0 < 2 div 0"), false); + CHECK_XPATH_BOOLEAN(c, STR("-1 div 0 < 2 div 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("-1 div 0 > 2 div 0"), false); + +#ifndef MSVC6_NAN_BUG + CHECK_XPATH_BOOLEAN(c, STR("0 div 0 < 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 div 0 <= 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 div 0 > 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 div 0 >= 1"), false); +#endif + + // upcast to number + CHECK_XPATH_BOOLEAN(c, STR("2 < '2'"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 < '2'"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 <= '2'"), true); + CHECK_XPATH_BOOLEAN(c, STR("3 <= '2'"), false); + CHECK_XPATH_BOOLEAN(c, STR("2 > '2'"), false); + CHECK_XPATH_BOOLEAN(c, STR("3 > '2'"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 >= '2'"), true); + CHECK_XPATH_BOOLEAN(c, STR("3 >= '2'"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 >= true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 > true()"), false); +} + +TEST_XML(xpath_operators_inequality_node_set_node_set, "1-1-1001nan1-4") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // node set vs node set + CHECK_XPATH_BOOLEAN(c, STR("x < x"), false); + CHECK_XPATH_BOOLEAN(c, STR("x > x"), false); + CHECK_XPATH_BOOLEAN(c, STR("x <= x"), false); + CHECK_XPATH_BOOLEAN(c, STR("x >= x"), false); + + CHECK_XPATH_BOOLEAN(n, STR("c1/v > x"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v < x"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v >= x"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v <= x"), false); + + CHECK_XPATH_BOOLEAN(n, STR("x > c1/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("x < c1/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("x >= c1/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("x <= c1/v"), false); + + CHECK_XPATH_BOOLEAN(n, STR("c1/v > c3/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v >= c3/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v < c3/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v <= c3/v"), true); + +#ifndef MSVC6_NAN_BUG + CHECK_XPATH_BOOLEAN(n, STR("c1/v > c2/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v >= c2/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v < c2/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v <= c2/v"), true); +#endif +} + +TEST_XML(xpath_operators_inequality_node_set_primitive, "1-1-1001nan") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // node set vs number + CHECK_XPATH_BOOLEAN(c, STR("x < 0"), false); + CHECK_XPATH_BOOLEAN(c, STR("x > 0"), false); + CHECK_XPATH_BOOLEAN(c, STR("x <= 0"), false); + CHECK_XPATH_BOOLEAN(c, STR("x >= 0"), false); + + CHECK_XPATH_BOOLEAN(c, STR("0 < x"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 > x"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 <= x"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 >= x"), false); + + CHECK_XPATH_BOOLEAN(n, STR("c1/v > 0"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v > 1"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v >= 0"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v < 0"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v <= 0"), true); + + CHECK_XPATH_BOOLEAN(n, STR("0 < c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("1 < c1/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("0 <= c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("0 > c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("0 >= c1/v"), true); + + // node set vs string + CHECK_XPATH_BOOLEAN(n, STR("c1/v > '0'"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v > '1'"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v >= '0'"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v < '0'"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v <= '0'"), true); + + CHECK_XPATH_BOOLEAN(n, STR("'0' < c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("'1' < c1/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("'0' <= c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("'0' > c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("'0' >= c1/v"), true); + + // node set vs boolean + CHECK_XPATH_BOOLEAN(n, STR("c1/v > false()"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v > true()"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v >= false()"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v < false()"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v <= false()"), true); + + CHECK_XPATH_BOOLEAN(n, STR("false() < c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("true() < c1/v"), false); + CHECK_XPATH_BOOLEAN(n, STR("false() <= c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("false() > c1/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("false() >= c1/v"), true); +} + +TEST(xpath_operators_boolean_precedence) +{ + xml_node c; + + CHECK_XPATH_BOOLEAN(c, STR("1 = 0 or 2 = 2"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 = (0 or 2) = false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 < 0 or 2 > 2"), false); + CHECK_XPATH_BOOLEAN(c, STR("2 < 1 = false()"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 < (1 = false())"), false); + CHECK_XPATH_BOOLEAN(c, STR("3 > 2 > 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("(3 > 2) > 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("3 > (2 > 1)"), true); +} + +TEST_XML(xpath_operators_union, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(n, STR("employee | .")) % 2 % 3 % 4 % 6 % 8 % 11; + CHECK_XPATH_NODESET(n, STR("employee[@secretary] | employee[@assistant]")) % 4 % 6 % 8 % 11; + CHECK_XPATH_NODESET(n, STR("employee[@assistant] | employee[@secretary]")) % 4 % 6 % 8 % 11; + CHECK_XPATH_NODESET(n, STR("employee[@secretary] | employee[@nobody]")) % 4 % 8 % 11; + CHECK_XPATH_NODESET(n, STR("employee[@nobody] | employee[@secretary]")) % 4 % 8 % 11; + CHECK_XPATH_NODESET(n, STR("tail/preceding-sibling::employee | .")) % 2 % 3 % 4 % 6 % 8 % 11; + CHECK_XPATH_NODESET(n, STR(". | tail/preceding-sibling::employee | .")) % 2 % 3 % 4 % 6 % 8 % 11; +} + +TEST(xpath_operators_union_error) +{ + CHECK_XPATH_FAIL(STR(". | true()")); + CHECK_XPATH_FAIL(STR(". | 1")); + CHECK_XPATH_FAIL(STR(". | '1'")); + CHECK_XPATH_FAIL(STR(". | count(.)")); + CHECK_XPATH_FAIL(STR("true() | .")); + CHECK_XPATH_FAIL(STR("1 | .")); + CHECK_XPATH_FAIL(STR("'1' | .")); + CHECK_XPATH_FAIL(STR("count(.) | .")); +} + +TEST(xpath_operators_associativity_boolean) +{ + xml_node c; + + CHECK_XPATH_BOOLEAN(c, STR("false() or true() and true() and false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("3 > 2 > 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("4 > 3 > 2 > 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("5 > 4 > 3 > 2 > 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 < 2 < 3 < 4 < 5"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 <= 2 <= 3 <= 4 <= 5"), true); + CHECK_XPATH_BOOLEAN(c, STR("5 >= 4 >= 3 >= 2 >= 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("3 >= 2 >= 1"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 >= 1"), true); + CHECK_XPATH_BOOLEAN(c, STR("4 >= 3 >= 2 >= 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("((((5 > 4) > 3) > 2) > 1)"), false); + CHECK_XPATH_BOOLEAN(c, STR("2 != 3 != 1 != 4 != 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("(((2 != 3) != 1) != 4) != 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 != 3 != 1 != 4 != 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("(((2 != 3) != 1) != 4) != 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("2 = 3 = 1 = 4 = 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("(((2 = 3) = 1) = 4) = 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 = 3 = 1 = 4 = 1"), false); + CHECK_XPATH_BOOLEAN(c, STR("(((2 = 3) = 1) = 4) = 1"), false); +} + +TEST(xpath_operators_associativity_arithmetic) +{ + xml_node c; + + CHECK_XPATH_NUMBER(c, STR("2+1-1+1"), 3); + CHECK_XPATH_NUMBER(c, STR("1+2+1-1+1"), 4); + CHECK_XPATH_NUMBER(c, STR("1+1+2+1-1+1"), 5); + CHECK_XPATH_NUMBER(c, STR("1-1+1"), 1); +} + +#endif diff --git a/tests/test_xpath_parse.cpp b/tests/test_xpath_parse.cpp index ceede22..8c08ef9 100644 --- a/tests/test_xpath_parse.cpp +++ b/tests/test_xpath_parse.cpp @@ -1,272 +1,272 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -#include - -TEST(xpath_literal_parse) -{ - xml_node c; - CHECK_XPATH_STRING(c, STR("'a\"b'"), STR("a\"b")); - CHECK_XPATH_STRING(c, STR("\"a'b\""), STR("a'b")); - CHECK_XPATH_STRING(c, STR("\"\""), STR("")); - CHECK_XPATH_STRING(c, STR("\'\'"), STR("")); -} - -TEST(xpath_literal_error) -{ - CHECK_XPATH_FAIL(STR("\"")); - CHECK_XPATH_FAIL(STR("\"foo")); - CHECK_XPATH_FAIL(STR("\'")); - CHECK_XPATH_FAIL(STR("\'bar")); -} - -TEST(xpath_number_parse) -{ - xml_node c; - CHECK_XPATH_NUMBER(c, STR("0"), 0); - CHECK_XPATH_NUMBER(c, STR("123"), 123); - CHECK_XPATH_NUMBER(c, STR("123.456"), 123.456); - CHECK_XPATH_NUMBER(c, STR(".123"), 0.123); - CHECK_XPATH_NUMBER(c, STR("123.4567890123456789012345"), 123.4567890123456789012345); - CHECK_XPATH_NUMBER(c, STR("123."), 123); -} - -TEST(xpath_number_error) -{ - CHECK_XPATH_FAIL(STR("123a")); - CHECK_XPATH_FAIL(STR("123.a")); - CHECK_XPATH_FAIL(STR(".123a")); -} - -TEST(xpath_variables) -{ - CHECK_XPATH_FAIL(STR("$var")); // not implemented - CHECK_XPATH_FAIL(STR("$1")); -} - -TEST(xpath_empty_expression) -{ - CHECK_XPATH_FAIL(STR("")); -} - -TEST(xpath_lexer_error) -{ - CHECK_XPATH_FAIL(STR("!")); - CHECK_XPATH_FAIL(STR("&")); -} - -TEST(xpath_unmatched_braces) -{ - CHECK_XPATH_FAIL(STR("node[")); - CHECK_XPATH_FAIL(STR("node[1")); - CHECK_XPATH_FAIL(STR("node[]]")); - CHECK_XPATH_FAIL(STR("node(")); - CHECK_XPATH_FAIL(STR("node(()")); - CHECK_XPATH_FAIL(STR("(node)[1")); - CHECK_XPATH_FAIL(STR("(1")); -} - -TEST(xpath_incorrect_step) -{ - CHECK_XPATH_FAIL(STR("child::1")); - CHECK_XPATH_FAIL(STR("something::*")); - CHECK_XPATH_FAIL(STR("a::*")); - CHECK_XPATH_FAIL(STR("c::*")); - CHECK_XPATH_FAIL(STR("d::*")); - CHECK_XPATH_FAIL(STR("f::*")); - CHECK_XPATH_FAIL(STR("n::*")); - CHECK_XPATH_FAIL(STR("p::*")); -} - -TEST(xpath_semantics_error) -{ - CHECK_XPATH_FAIL(STR("1[1]")); - CHECK_XPATH_FAIL(STR("1 | 1")); -} - -TEST(xpath_semantics_posinv) // coverage for contains() -{ - xpath_query(STR("(node)[substring(1, 2, 3)]")); - xpath_query(STR("(node)[concat(1, 2, 3, 4)]")); - xpath_query(STR("(node)[count(foo)]")); - xpath_query(STR("(node)[local-name()]")); - xpath_query(STR("(node)[(node)[1]]")); -} - -TEST(xpath_parse_paths_valid) -{ - const char_t* paths[] = - { - // From Jaxen tests - STR("foo[.='bar']"), STR("foo[.!='bar']"), STR("/"), STR("*"), STR("//foo"), STR("/*"), STR("/."), STR("/foo[/bar[/baz]]"), - STR("/foo/bar/baz[(1 or 2) + 3 * 4 + 8 and 9]"), STR("/foo/bar/baz"), STR("(.)[1]"), STR("self::node()"), STR("."), STR("count(/)"), - STR("foo[1]"), STR("/baz[(1 or 2) + 3 * 4 + 8 and 9]"), STR("foo/bar[/baz[(1 or 2) - 3 mod 4 + 8 and 9 div 8]]"), - STR("foo/bar/yeah:baz[a/b/c and toast]"), STR("/foo/bar[../x='123']"), STR("/foo[@bar='1234']"), STR("foo|bar"), - STR("/foo|/bar[@id='1234']"), STR("count(//author/attribute::*)"), STR("/child::node()/child::node()[@id='_13563275']"), - STR("10 + (count(descendant::author) * 5)"), STR("10 + count(descendant::author) * 5"), STR("2 + (2 * 5)"), STR("//foo:bar"), - STR("count(//author)+5"), STR("count(//author)+count(//author/attribute::*)"), STR("/foo/bar[@a='1' and @c!='2']"), - STR("12 + (count(//author)+count(//author/attribute::*)) div 2"), STR("text()[.='foo']"), STR("/*/*[@id='123']") - STR("/foo/bar[@a='1' and @b='2']"), STR("/foo/bar[@a='1' and @b!='2']"), STR("//attribute::*[.!='crunchy']"), - STR("'//*[contains(string(text()),\"yada yada\")]'"), - - // From ajaxslt tests - STR("@*"), STR("@*|node()"), STR("/descendant-or-self::div"), STR("/div"), STR("//div"), STR("/descendant-or-self::node()/child::para"), - STR("substring('12345', 0, 3)"), STR("//title | //link"), STR("x//title"), STR("x/title"), STR("id('a')//title"), STR("//*[@about]"), - STR("count(descendant::*)"), STR("count(descendant::*) + count(ancestor::*)"), STR("@*|text()"), STR("*|/"), STR("source|destination"), - STR("page != 'to' and page != 'from'"), STR("substring-after(icon/@image, '/mapfiles/marker')"), STR("substring-before(str, c)"), STR("page = 'from'"), - STR("segments/@time"), STR("child::para"), STR("child::*"), STR("child::text()"), STR("child::node()"), STR("attribute::name"), STR("attribute::*"), - STR("descendant::para"), STR("ancestor::div"), STR("ancestor-or-self::div"), STR("descendant-or-self::para"), STR("self::para"), STR("child::*/child::para"), - STR("concat(substring-before(@image,'marker'),'icon',substring-after(@image,'marker'))"), STR("/"), STR("/descendant::para"), STR("/descendant::olist/child::item"), - STR("child::para[position()=1]"), STR("child::para[position()=last()]"), STR("child::para[position()=last()-1]"), STR("child::para[position()>1]"), - STR("following-sibling::chapter[position()=1]"), STR("preceding-sibling::chapter[position()=1]"), STR("/descendant::figure[position()=42]"), - STR("/child::doc/child::chapter[position()=5]/child::section[position()=2]"), STR("child::chapter/descendant::para"), STR("child::para[attribute::type='warning']"), - STR("child::para[attribute::type='warning'][position()=5]"), STR("child::para[position()=5][attribute::type='warning']"), STR("child::chapter[child::title='Introduction']"), - STR("child::chapter[child::title]"), STR("child::*[self::chapter or self::appendix]"), STR("child::*[self::chapter or self::appendix][position()=last()]"), - STR("count(//*[id='u1']|//*[id='u2'])"), STR("count(//*[id='u1']|//*[class='u'])"), STR("count(//*[class='u']|//*[class='u'])"), STR("count(//*[class='u']|//*[id='u1'])"), - STR("count(//*[@id='self']/ancestor-or-self::*)"), STR("count(//*[@id='self']/ancestor::*)"), STR("count(//*[@id='self']/attribute::*)"), STR("count(//*[@id='self']/child::*)"), - STR("count(//*[@id='self']/descendant-or-self::*)"), STR("count(//*[@id='self']/descendant::*)"), STR("count(//*[@id='self']/following-sibling::*)"), - STR("count(//*[@id='self']/following::*)"), STR("//*[@id='self']/parent::*/@id"), STR("count(//*[@id='self']/preceding-sibling::*)"), - STR("count(//*[@id='self']/preceding::*)"), STR("//*[@id='self']/self::*/@id"), STR("id('nested1')/div[1]//input[2]"), STR("id('foo')//div[contains(@id, 'useful')]//input"), - STR("(//table[@class='stylee'])//th[text()='theHeaderText']/../td"), STR("address"), STR("address=string(/page/user/defaultlocation)"), STR("count-of-snippet-of-url = 0"), - STR("daddr"), STR("form"), STR("form = 'from'"), STR("form = 'to'"), STR("form='near'"), STR("home"), STR("i"), STR("i > page and i < page + range"), - STR("i < page and i >= page - range"), STR("i < @max"), STR("i <= page"), STR("i + 1"), STR("i = page"), STR("i = 1"), STR("info = position() or (not(info) and position() = 1)"), - STR("is-first-order"), STR("is-first-order and snippets-exist"), STR("more"), STR("more > 0"), STR("near-point"), STR("page"), STR("page != 'from'"), STR("page != 'to'"), - STR("page != 'to' and page != 'from'"), STR("page > 1"), STR("page = 'basics'"), STR("page = 'details'"), STR("page = 'from'"), STR("page = 'to'"), STR("page='from'"), - STR("page='to'"), STR("r >= 0.5"), STR("r >= 1"), STR("r - 0"), STR("r - 1"), STR("r - 2"), STR("r - 3"), STR("r - 4"), STR("saddr"), STR("sources"), STR("sources[position() < details]"), - STR("src"), STR("str"), STR("\"'\""), STR("(//location[string(info/references/reference[1]/url)=string(current-url)]/info/references/reference[1])[1]"), - STR("(not(count-of-snippet-of-url = 0) and (position() = 1) or not(current-url = //locations/location[position() = last-pos]//reference[1]/url))"), - STR("(not(info) and position() = 1) or info = position()"), STR("."), STR("../@arg0"), STR("../@filterpng"), STR("/page/@filterpng"), STR("4"), STR("@attribution"), - STR("@id"), STR("@max > @num"), STR("@meters > 16093"), STR("@name"), STR("@start div @num + 1"), STR("@url"), STR("ad"), STR("address/line"), STR("adsmessage"), - STR("attr"), STR("boolean(location[@id='near'][icon/@image])"), STR("bubble/node()"), STR("calltoaction/node()"), STR("category"), STR("contains(str, c)"), - STR("count(//location[string(info/references/reference[1]/url)=string(current-url)]//snippet)"), STR("count(//snippet)"), STR("count(attr)"), STR("count(location)"), - STR("count(structured/source) > 1"), STR("description/node()"), STR("destination"), STR("destinationAddress"), STR("domain"), STR("false()"), STR("icon/@class != 'noicon'"), - STR("icon/@image"), STR("info"), STR("info/address/line"), STR("info/distance"), STR("info/distance and near-point"), STR("info/distance and info/phone and near-point"), - STR("info/distance or info/phone"), STR("info/panel/node()"), STR("info/phone"), STR("info/references/reference[1]"), STR("info/references/reference[1]/snippet"), - STR("info/references/reference[1]/url"), STR("info/title"), STR("info/title/node()"), STR("line"), STR("location"), STR("location[@id!='near']"), STR("location[@id='near'][icon/@image]"), - STR("location[position() > umlocations div 2]"), STR("location[position() <= numlocations div 2]"), STR("locations"), STR("locations/location"), STR("near"), STR("node()"), - STR("not(count-of-snippets = 0)"), STR("not(form = 'from')"), STR("not(form = 'near')"), STR("not(form = 'to')"), STR("not(../@page)"), STR("not(structured/source)"), STR("notice"), - STR("number(../@info)"), STR("number(../@items)"), STR("number(/page/@linewidth)"), STR("page/ads"), STR("page/directions"), STR("page/error"), STR("page/overlay"), - STR("page/overlay/locations/location"), STR("page/refinements"), STR("page/request/canonicalnear"), STR("page/request/near"), STR("page/request/query"), STR("page/spelling/suggestion"), - STR("page/user/defaultlocation"), STR("phone"), STR("position()"), STR("position() != 1"), STR("position() != last()"), STR("position() > 1"), STR("position() < details"), - STR("position()-1"), STR("query"), STR("references/@total"), STR("references/reference"), STR("references/reference/domain"), STR("references/reference/url"), - STR("reviews/@positive div (reviews/@positive + reviews/@negative) * 5"), STR("reviews/@positive div (reviews/@positive + reviews/@negative) * (5)"), STR("reviews/@total"), - STR("reviews/@total > 1"), STR("reviews/@total > 5"), STR("reviews/@total = 1"), STR("segments/@distance"), STR("segments/@time"), STR("segments/segment"), STR("shorttitle/node()"), - STR("snippet"), STR("snippet/node()"), STR("source"), STR("sourceAddress"), STR("sourceAddress and destinationAddress"), STR("string(../@daddr)"), STR("string(../@form)"), - STR("string(../@page)"), STR("string(../@saddr)"), STR("string(info/title)"), STR("string(page/request/canonicalnear) != ''"), STR("string(page/request/near) != ''"), - STR("string-length(address) > linewidth"), STR("structured/@total - details"), STR("structured/source"), STR("structured/source[@name]"), STR("substring(address, 1, linewidth - 3)"), - STR("substring-after(str, c)"), STR("substring-after(icon/@image, '/mapfiles/marker')"), STR("substring-before(str, c)"), STR("tagline/node()"), STR("targetedlocation"), - STR("title"), STR("title/node()"), STR("true()"), STR("url"), STR("visibleurl"), STR("id(\"level10\")/ancestor::SPAN"), STR("id(\"level10\")/ancestor-or-self::SPAN"), STR("//attribute::*"), - STR("child::HTML/child::BODY/child::H1"), STR("descendant::node()"), STR("descendant-or-self::SPAN"), STR("id(\"first\")/following::text()"), STR("id(\"first\")/following-sibling::node()"), - STR("id(\"level10\")/parent::node()"), STR("id(\"last\")/preceding::text()"), STR("id(\"last\")/preceding-sibling::node()"), STR("/HTML/BODY/H1/self::node()"), STR("//*[@name]"), - STR("id(\"pet\")/SELECT[@name=\"species\"]/OPTION[@selected]/@value"), STR("descendant::INPUT[@name=\"name\"]/@value"), STR("id(\"pet\")/INPUT[@name=\"gender\" and @checked]/@value"), - STR("//TEXTAREA[@name=\"description\"]/text()"), STR("id(\"div1\")|id(\"div2\")|id(\"div3 div4 div5\")"), STR("//LI[1]"), STR("//LI[last()]/text()"), STR("//LI[position() mod 2]/@class"), - STR("//text()[.=\"foo\"]"), STR("descendant-or-self::SPAN[position() > 2]"), STR("descendant::*[contains(@class,\" fruit \")]"), - - // ajaxslt considers this path invalid, however I believe it's valid as per spec - STR("***"), - - // Oasis MSFT considers this path invalid, however I believe it's valid as per spec - STR("**..**"), - - // Miscellaneous - STR("..***..***.***.***..***..***..") - }; - - for (size_t i = 0; i < sizeof(paths) / sizeof(paths[0]); ++i) - { - xpath_query q(paths[i]); - } -} - -TEST(xpath_parse_paths_valid_unicode) -{ - // From ajaxslt - const wchar_t* paths[] = - { - #ifdef U_LITERALS - L"/descendant-or-self::\u90e8\u5206", L"//\u90e8\u5206", L"substring('\uff11\uff12\uff13\uff14\uff15', 0, 3)", L"//\u30bf\u30a4\u30c8\u30eb | //\u30ea\u30f3\u30af", - L"\u8b0e//\u30bf\u30a4\u30c8\u30eb", L"//*[@\u30c7\u30b9\u30c6\u30a3\u30cd\u30a4\u30b7\u30e7\u30f3]", L"\u30da\u30fc\u30b8 = '\u304b\u3089'", - L"concat(substring-before(@\u30a4\u30e1\u30fc\u30b8,'\u76ee\u5370'),'\u30a2\u30a4\u30b3\u30f3',substring-after(@\u30a4\u30e1\u30fc\u30b8,'\u76ee\u5370'))", - L"\u30bd\u30fc\u30b9|\u30c7\u30b9\u30c6\u30a3\u30cd\u30a4\u30b7\u30e7\u30f3", L"\u30da\u30fc\u30b8 != '\u307e\u3067' and \u30da\u30fc\u30b8 != '\u304b\u3089'", - L"substring-after(\u30a2\u30a4\u30b3\u30f3/@\u30a4\u30e1\u30fc\u30b8, '/\u5730\u56f3\u30d5\u30a1\u30a4\u30eb/\u76ee\u5370')", L"child::\u6bb5\u843d", - L"substring-before(\u6587\u5b57\u5217, \u6587\u5b57)", L"\u30bb\u30b0\u30e1\u30f3\u30c8/@\u6642\u523b", L"attribute::\u540d\u524d", L"descendant::\u6bb5\u843d", - L"ancestor::\u90e8\u5206", L"ancestor-or-self::\u90e8\u5206", L"descendant-or-self::\u6bb5\u843d", L"self::\u6bb5\u843d", L"child::\u7ae0/descendant::\u6bb5\u843d", - L"child::*/child::\u6bb5\u843d", L"/descendant::\u6bb5\u843d", L"/descendant::\u9806\u5e8f\u30ea\u30b9\u30c8/child::\u9805\u76ee", L"child::\u6bb5\u843d[position()=1]", - L"child::\u6bb5\u843d[position()=last()]", L"child::\u6bb5\u843d[position()=last()-1]", L"child::\u6bb5\u843d[position()>1]", L"following-sibling::\u7ae0[position()=1]", - L"preceding-sibling::\u7ae0[position()=1]", L"/descendant::\u56f3\u8868[position()=42]", L"/child::\u6587\u66f8/child::\u7ae0[position()=5]/child::\u7bc0[position()=2]", - L"child::\u6bb5\u843d[attribute::\u30bf\u30a4\u30d7='\u8b66\u544a']", L"child::\u6bb5\u843d[attribute::\u30bf\u30a4\u30d7='\u8b66\u544a'][position()=5]", - L"child::\u6bb5\u843d[position()=5][attribute::\u30bf\u30a4\u30d7='\u8b66\u544a']", L"child::\u7ae0[child::\u30bf\u30a4\u30c8\u30eb='\u306f\u3058\u3081\u306b']", - L"child::\u7ae0[child::\u30bf\u30a4\u30c8\u30eb]", L"child::*[self::\u7ae0 or self::\u4ed8\u9332]", L"child::*[self::\u7ae0 or self::\u4ed8\u9332][position()=last()]", - #else - L"/descendant-or-self::\x90e8\x5206", L"//\x90e8\x5206", L"substring('\xff11\xff12\xff13\xff14\xff15', 0, 3)", L"//\x30bf\x30a4\x30c8\x30eb | //\x30ea\x30f3\x30af", - L"\x8b0e//\x30bf\x30a4\x30c8\x30eb", L"//*[@\x30c7\x30b9\x30c6\x30a3\x30cd\x30a4\x30b7\x30e7\x30f3]", L"\x30da\x30fc\x30b8 = '\x304b\x3089'", - L"concat(substring-before(@\x30a4\x30e1\x30fc\x30b8,'\x76ee\x5370'),'\x30a2\x30a4\x30b3\x30f3',substring-after(@\x30a4\x30e1\x30fc\x30b8,'\x76ee\x5370'))", - L"\x30bd\x30fc\x30b9|\x30c7\x30b9\x30c6\x30a3\x30cd\x30a4\x30b7\x30e7\x30f3", L"\x30da\x30fc\x30b8 != '\x307e\x3067' and \x30da\x30fc\x30b8 != '\x304b\x3089'", - L"substring-after(\x30a2\x30a4\x30b3\x30f3/@\x30a4\x30e1\x30fc\x30b8, '/\x5730\x56f3\x30d5\x30a1\x30a4\x30eb/\x76ee\x5370')", L"child::\x6bb5\x843d", - L"substring-before(\x6587\x5b57\x5217, \x6587\x5b57)", L"\x30bb\x30b0\x30e1\x30f3\x30c8/@\x6642\x523b", L"attribute::\x540d\x524d", L"descendant::\x6bb5\x843d", - L"ancestor::\x90e8\x5206", L"ancestor-or-self::\x90e8\x5206", L"descendant-or-self::\x6bb5\x843d", L"self::\x6bb5\x843d", L"child::\x7ae0/descendant::\x6bb5\x843d", - L"child::*/child::\x6bb5\x843d", L"/descendant::\x6bb5\x843d", L"/descendant::\x9806\x5e8f\x30ea\x30b9\x30c8/child::\x9805\x76ee", L"child::\x6bb5\x843d[position()=1]", - L"child::\x6bb5\x843d[position()=last()]", L"child::\x6bb5\x843d[position()=last()-1]", L"child::\x6bb5\x843d[position()>1]", L"following-sibling::\x7ae0[position()=1]", - L"preceding-sibling::\x7ae0[position()=1]", L"/descendant::\x56f3\x8868[position()=42]", L"/child::\x6587\x66f8/child::\x7ae0[position()=5]/child::\x7bc0[position()=2]", - L"child::\x6bb5\x843d[attribute::\x30bf\x30a4\x30d7='\x8b66\x544a']", L"child::\x6bb5\x843d[attribute::\x30bf\x30a4\x30d7='\x8b66\x544a'][position()=5]", - L"child::\x6bb5\x843d[position()=5][attribute::\x30bf\x30a4\x30d7='\x8b66\x544a']", L"child::\x7ae0[child::\x30bf\x30a4\x30c8\x30eb='\x306f\x3058\x3081\x306b']", - L"child::\x7ae0[child::\x30bf\x30a4\x30c8\x30eb]", L"child::*[self::\x7ae0 or self::\x4ed8\x9332]", L"child::*[self::\x7ae0 or self::\x4ed8\x9332][position()=last()]", - #endif - }; - - for (size_t i = 0; i < sizeof(paths) / sizeof(paths[0]); ++i) - { - #if defined(PUGIXML_WCHAR_MODE) - xpath_query q(paths[i]); - #elif !defined(PUGIXML_NO_STL) - std::basic_string path_utf8 = pugi::as_utf8(paths[i]); - xpath_query q(path_utf8.c_str()); - #endif - } -} - -TEST(xpath_parse_invalid) -{ - const char_t* paths[] = - { - // From Jaxen tests - STR("//:p"), STR("/foo/bar/"), STR("12 + (count(//author)+count(//author/attribute::*)) / 2"), STR("id()/2"), STR("+"), - STR("///triple slash"), STR("/numbers numbers"), STR("/a/b[c > d]efg"), STR("/inv/child::"), STR("/invoice/@test[abcd"), - STR("/invoice/@test[abcd > x"), STR("string-length('a"), STR("/descendant::()"), STR("(1 + 1"), STR("!false()"), - STR("$author"), STR("10 + $foo"), STR("$foo:bar"), STR("$varname[@a='1']"), STR("foo/$variable/foo"), - STR(".[1]"), STR("chyld::foo"), STR("foo/tacos()"), STR("foo/tacos()"), STR("/foo/bar[baz"), STR("//"), STR("*:foo"), - STR("/cracker/cheese[(mold > 1) and (sense/taste"), - - // From xpath-as3 tests - STR("a b"), STR("//self::node())"), STR("/x/y[contains(self::node())"), STR("/x/y[contains(self::node()]"), STR("///"), STR("text::a"), - - // From haXe-xpath tests - STR("|/gjs"), STR("+3"), STR("/html/body/p != ---'div'/a"), STR(""), STR("@"), STR("#akf"), STR(",") - - // Miscellaneous - STR("..."), STR("...."), STR("**"), STR("****"), STR("******"), STR("..***..***.***.***..***..***..*"), STR("/[1]") - }; - - for (size_t i = 0; i < sizeof(paths) / sizeof(paths[0]); ++i) - { - CHECK_XPATH_FAIL(paths[i]); - } -} - -TEST_XML(xpath_parse_absolute, "
") -{ - CHECK_XPATH_NODESET(doc, STR("/")) % 1; - - CHECK_XPATH_NODESET(doc, STR("/div/s")) % 3; - CHECK_XPATH_NODESET(doc, STR("/ div /s")) % 3; - CHECK_XPATH_FAIL(STR("/ div 5")); - - CHECK_XPATH_NODESET(doc, STR("/*/s")) % 3; - CHECK_XPATH_NODESET(doc, STR("/ * /s")) % 3; - CHECK_XPATH_FAIL(STR("/ * 5")); - - CHECK_XPATH_NODESET(doc, STR("/*[/]")) % 2; -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +#include + +TEST(xpath_literal_parse) +{ + xml_node c; + CHECK_XPATH_STRING(c, STR("'a\"b'"), STR("a\"b")); + CHECK_XPATH_STRING(c, STR("\"a'b\""), STR("a'b")); + CHECK_XPATH_STRING(c, STR("\"\""), STR("")); + CHECK_XPATH_STRING(c, STR("\'\'"), STR("")); +} + +TEST(xpath_literal_error) +{ + CHECK_XPATH_FAIL(STR("\"")); + CHECK_XPATH_FAIL(STR("\"foo")); + CHECK_XPATH_FAIL(STR("\'")); + CHECK_XPATH_FAIL(STR("\'bar")); +} + +TEST(xpath_number_parse) +{ + xml_node c; + CHECK_XPATH_NUMBER(c, STR("0"), 0); + CHECK_XPATH_NUMBER(c, STR("123"), 123); + CHECK_XPATH_NUMBER(c, STR("123.456"), 123.456); + CHECK_XPATH_NUMBER(c, STR(".123"), 0.123); + CHECK_XPATH_NUMBER(c, STR("123.4567890123456789012345"), 123.4567890123456789012345); + CHECK_XPATH_NUMBER(c, STR("123."), 123); +} + +TEST(xpath_number_error) +{ + CHECK_XPATH_FAIL(STR("123a")); + CHECK_XPATH_FAIL(STR("123.a")); + CHECK_XPATH_FAIL(STR(".123a")); +} + +TEST(xpath_variables) +{ + CHECK_XPATH_FAIL(STR("$var")); // not implemented + CHECK_XPATH_FAIL(STR("$1")); +} + +TEST(xpath_empty_expression) +{ + CHECK_XPATH_FAIL(STR("")); +} + +TEST(xpath_lexer_error) +{ + CHECK_XPATH_FAIL(STR("!")); + CHECK_XPATH_FAIL(STR("&")); +} + +TEST(xpath_unmatched_braces) +{ + CHECK_XPATH_FAIL(STR("node[")); + CHECK_XPATH_FAIL(STR("node[1")); + CHECK_XPATH_FAIL(STR("node[]]")); + CHECK_XPATH_FAIL(STR("node(")); + CHECK_XPATH_FAIL(STR("node(()")); + CHECK_XPATH_FAIL(STR("(node)[1")); + CHECK_XPATH_FAIL(STR("(1")); +} + +TEST(xpath_incorrect_step) +{ + CHECK_XPATH_FAIL(STR("child::1")); + CHECK_XPATH_FAIL(STR("something::*")); + CHECK_XPATH_FAIL(STR("a::*")); + CHECK_XPATH_FAIL(STR("c::*")); + CHECK_XPATH_FAIL(STR("d::*")); + CHECK_XPATH_FAIL(STR("f::*")); + CHECK_XPATH_FAIL(STR("n::*")); + CHECK_XPATH_FAIL(STR("p::*")); +} + +TEST(xpath_semantics_error) +{ + CHECK_XPATH_FAIL(STR("1[1]")); + CHECK_XPATH_FAIL(STR("1 | 1")); +} + +TEST(xpath_semantics_posinv) // coverage for contains() +{ + xpath_query(STR("(node)[substring(1, 2, 3)]")); + xpath_query(STR("(node)[concat(1, 2, 3, 4)]")); + xpath_query(STR("(node)[count(foo)]")); + xpath_query(STR("(node)[local-name()]")); + xpath_query(STR("(node)[(node)[1]]")); +} + +TEST(xpath_parse_paths_valid) +{ + const char_t* paths[] = + { + // From Jaxen tests + STR("foo[.='bar']"), STR("foo[.!='bar']"), STR("/"), STR("*"), STR("//foo"), STR("/*"), STR("/."), STR("/foo[/bar[/baz]]"), + STR("/foo/bar/baz[(1 or 2) + 3 * 4 + 8 and 9]"), STR("/foo/bar/baz"), STR("(.)[1]"), STR("self::node()"), STR("."), STR("count(/)"), + STR("foo[1]"), STR("/baz[(1 or 2) + 3 * 4 + 8 and 9]"), STR("foo/bar[/baz[(1 or 2) - 3 mod 4 + 8 and 9 div 8]]"), + STR("foo/bar/yeah:baz[a/b/c and toast]"), STR("/foo/bar[../x='123']"), STR("/foo[@bar='1234']"), STR("foo|bar"), + STR("/foo|/bar[@id='1234']"), STR("count(//author/attribute::*)"), STR("/child::node()/child::node()[@id='_13563275']"), + STR("10 + (count(descendant::author) * 5)"), STR("10 + count(descendant::author) * 5"), STR("2 + (2 * 5)"), STR("//foo:bar"), + STR("count(//author)+5"), STR("count(//author)+count(//author/attribute::*)"), STR("/foo/bar[@a='1' and @c!='2']"), + STR("12 + (count(//author)+count(//author/attribute::*)) div 2"), STR("text()[.='foo']"), STR("/*/*[@id='123']") + STR("/foo/bar[@a='1' and @b='2']"), STR("/foo/bar[@a='1' and @b!='2']"), STR("//attribute::*[.!='crunchy']"), + STR("'//*[contains(string(text()),\"yada yada\")]'"), + + // From ajaxslt tests + STR("@*"), STR("@*|node()"), STR("/descendant-or-self::div"), STR("/div"), STR("//div"), STR("/descendant-or-self::node()/child::para"), + STR("substring('12345', 0, 3)"), STR("//title | //link"), STR("x//title"), STR("x/title"), STR("id('a')//title"), STR("//*[@about]"), + STR("count(descendant::*)"), STR("count(descendant::*) + count(ancestor::*)"), STR("@*|text()"), STR("*|/"), STR("source|destination"), + STR("page != 'to' and page != 'from'"), STR("substring-after(icon/@image, '/mapfiles/marker')"), STR("substring-before(str, c)"), STR("page = 'from'"), + STR("segments/@time"), STR("child::para"), STR("child::*"), STR("child::text()"), STR("child::node()"), STR("attribute::name"), STR("attribute::*"), + STR("descendant::para"), STR("ancestor::div"), STR("ancestor-or-self::div"), STR("descendant-or-self::para"), STR("self::para"), STR("child::*/child::para"), + STR("concat(substring-before(@image,'marker'),'icon',substring-after(@image,'marker'))"), STR("/"), STR("/descendant::para"), STR("/descendant::olist/child::item"), + STR("child::para[position()=1]"), STR("child::para[position()=last()]"), STR("child::para[position()=last()-1]"), STR("child::para[position()>1]"), + STR("following-sibling::chapter[position()=1]"), STR("preceding-sibling::chapter[position()=1]"), STR("/descendant::figure[position()=42]"), + STR("/child::doc/child::chapter[position()=5]/child::section[position()=2]"), STR("child::chapter/descendant::para"), STR("child::para[attribute::type='warning']"), + STR("child::para[attribute::type='warning'][position()=5]"), STR("child::para[position()=5][attribute::type='warning']"), STR("child::chapter[child::title='Introduction']"), + STR("child::chapter[child::title]"), STR("child::*[self::chapter or self::appendix]"), STR("child::*[self::chapter or self::appendix][position()=last()]"), + STR("count(//*[id='u1']|//*[id='u2'])"), STR("count(//*[id='u1']|//*[class='u'])"), STR("count(//*[class='u']|//*[class='u'])"), STR("count(//*[class='u']|//*[id='u1'])"), + STR("count(//*[@id='self']/ancestor-or-self::*)"), STR("count(//*[@id='self']/ancestor::*)"), STR("count(//*[@id='self']/attribute::*)"), STR("count(//*[@id='self']/child::*)"), + STR("count(//*[@id='self']/descendant-or-self::*)"), STR("count(//*[@id='self']/descendant::*)"), STR("count(//*[@id='self']/following-sibling::*)"), + STR("count(//*[@id='self']/following::*)"), STR("//*[@id='self']/parent::*/@id"), STR("count(//*[@id='self']/preceding-sibling::*)"), + STR("count(//*[@id='self']/preceding::*)"), STR("//*[@id='self']/self::*/@id"), STR("id('nested1')/div[1]//input[2]"), STR("id('foo')//div[contains(@id, 'useful')]//input"), + STR("(//table[@class='stylee'])//th[text()='theHeaderText']/../td"), STR("address"), STR("address=string(/page/user/defaultlocation)"), STR("count-of-snippet-of-url = 0"), + STR("daddr"), STR("form"), STR("form = 'from'"), STR("form = 'to'"), STR("form='near'"), STR("home"), STR("i"), STR("i > page and i < page + range"), + STR("i < page and i >= page - range"), STR("i < @max"), STR("i <= page"), STR("i + 1"), STR("i = page"), STR("i = 1"), STR("info = position() or (not(info) and position() = 1)"), + STR("is-first-order"), STR("is-first-order and snippets-exist"), STR("more"), STR("more > 0"), STR("near-point"), STR("page"), STR("page != 'from'"), STR("page != 'to'"), + STR("page != 'to' and page != 'from'"), STR("page > 1"), STR("page = 'basics'"), STR("page = 'details'"), STR("page = 'from'"), STR("page = 'to'"), STR("page='from'"), + STR("page='to'"), STR("r >= 0.5"), STR("r >= 1"), STR("r - 0"), STR("r - 1"), STR("r - 2"), STR("r - 3"), STR("r - 4"), STR("saddr"), STR("sources"), STR("sources[position() < details]"), + STR("src"), STR("str"), STR("\"'\""), STR("(//location[string(info/references/reference[1]/url)=string(current-url)]/info/references/reference[1])[1]"), + STR("(not(count-of-snippet-of-url = 0) and (position() = 1) or not(current-url = //locations/location[position() = last-pos]//reference[1]/url))"), + STR("(not(info) and position() = 1) or info = position()"), STR("."), STR("../@arg0"), STR("../@filterpng"), STR("/page/@filterpng"), STR("4"), STR("@attribution"), + STR("@id"), STR("@max > @num"), STR("@meters > 16093"), STR("@name"), STR("@start div @num + 1"), STR("@url"), STR("ad"), STR("address/line"), STR("adsmessage"), + STR("attr"), STR("boolean(location[@id='near'][icon/@image])"), STR("bubble/node()"), STR("calltoaction/node()"), STR("category"), STR("contains(str, c)"), + STR("count(//location[string(info/references/reference[1]/url)=string(current-url)]//snippet)"), STR("count(//snippet)"), STR("count(attr)"), STR("count(location)"), + STR("count(structured/source) > 1"), STR("description/node()"), STR("destination"), STR("destinationAddress"), STR("domain"), STR("false()"), STR("icon/@class != 'noicon'"), + STR("icon/@image"), STR("info"), STR("info/address/line"), STR("info/distance"), STR("info/distance and near-point"), STR("info/distance and info/phone and near-point"), + STR("info/distance or info/phone"), STR("info/panel/node()"), STR("info/phone"), STR("info/references/reference[1]"), STR("info/references/reference[1]/snippet"), + STR("info/references/reference[1]/url"), STR("info/title"), STR("info/title/node()"), STR("line"), STR("location"), STR("location[@id!='near']"), STR("location[@id='near'][icon/@image]"), + STR("location[position() > umlocations div 2]"), STR("location[position() <= numlocations div 2]"), STR("locations"), STR("locations/location"), STR("near"), STR("node()"), + STR("not(count-of-snippets = 0)"), STR("not(form = 'from')"), STR("not(form = 'near')"), STR("not(form = 'to')"), STR("not(../@page)"), STR("not(structured/source)"), STR("notice"), + STR("number(../@info)"), STR("number(../@items)"), STR("number(/page/@linewidth)"), STR("page/ads"), STR("page/directions"), STR("page/error"), STR("page/overlay"), + STR("page/overlay/locations/location"), STR("page/refinements"), STR("page/request/canonicalnear"), STR("page/request/near"), STR("page/request/query"), STR("page/spelling/suggestion"), + STR("page/user/defaultlocation"), STR("phone"), STR("position()"), STR("position() != 1"), STR("position() != last()"), STR("position() > 1"), STR("position() < details"), + STR("position()-1"), STR("query"), STR("references/@total"), STR("references/reference"), STR("references/reference/domain"), STR("references/reference/url"), + STR("reviews/@positive div (reviews/@positive + reviews/@negative) * 5"), STR("reviews/@positive div (reviews/@positive + reviews/@negative) * (5)"), STR("reviews/@total"), + STR("reviews/@total > 1"), STR("reviews/@total > 5"), STR("reviews/@total = 1"), STR("segments/@distance"), STR("segments/@time"), STR("segments/segment"), STR("shorttitle/node()"), + STR("snippet"), STR("snippet/node()"), STR("source"), STR("sourceAddress"), STR("sourceAddress and destinationAddress"), STR("string(../@daddr)"), STR("string(../@form)"), + STR("string(../@page)"), STR("string(../@saddr)"), STR("string(info/title)"), STR("string(page/request/canonicalnear) != ''"), STR("string(page/request/near) != ''"), + STR("string-length(address) > linewidth"), STR("structured/@total - details"), STR("structured/source"), STR("structured/source[@name]"), STR("substring(address, 1, linewidth - 3)"), + STR("substring-after(str, c)"), STR("substring-after(icon/@image, '/mapfiles/marker')"), STR("substring-before(str, c)"), STR("tagline/node()"), STR("targetedlocation"), + STR("title"), STR("title/node()"), STR("true()"), STR("url"), STR("visibleurl"), STR("id(\"level10\")/ancestor::SPAN"), STR("id(\"level10\")/ancestor-or-self::SPAN"), STR("//attribute::*"), + STR("child::HTML/child::BODY/child::H1"), STR("descendant::node()"), STR("descendant-or-self::SPAN"), STR("id(\"first\")/following::text()"), STR("id(\"first\")/following-sibling::node()"), + STR("id(\"level10\")/parent::node()"), STR("id(\"last\")/preceding::text()"), STR("id(\"last\")/preceding-sibling::node()"), STR("/HTML/BODY/H1/self::node()"), STR("//*[@name]"), + STR("id(\"pet\")/SELECT[@name=\"species\"]/OPTION[@selected]/@value"), STR("descendant::INPUT[@name=\"name\"]/@value"), STR("id(\"pet\")/INPUT[@name=\"gender\" and @checked]/@value"), + STR("//TEXTAREA[@name=\"description\"]/text()"), STR("id(\"div1\")|id(\"div2\")|id(\"div3 div4 div5\")"), STR("//LI[1]"), STR("//LI[last()]/text()"), STR("//LI[position() mod 2]/@class"), + STR("//text()[.=\"foo\"]"), STR("descendant-or-self::SPAN[position() > 2]"), STR("descendant::*[contains(@class,\" fruit \")]"), + + // ajaxslt considers this path invalid, however I believe it's valid as per spec + STR("***"), + + // Oasis MSFT considers this path invalid, however I believe it's valid as per spec + STR("**..**"), + + // Miscellaneous + STR("..***..***.***.***..***..***..") + }; + + for (size_t i = 0; i < sizeof(paths) / sizeof(paths[0]); ++i) + { + xpath_query q(paths[i]); + } +} + +TEST(xpath_parse_paths_valid_unicode) +{ + // From ajaxslt + const wchar_t* paths[] = + { + #ifdef U_LITERALS + L"/descendant-or-self::\u90e8\u5206", L"//\u90e8\u5206", L"substring('\uff11\uff12\uff13\uff14\uff15', 0, 3)", L"//\u30bf\u30a4\u30c8\u30eb | //\u30ea\u30f3\u30af", + L"\u8b0e//\u30bf\u30a4\u30c8\u30eb", L"//*[@\u30c7\u30b9\u30c6\u30a3\u30cd\u30a4\u30b7\u30e7\u30f3]", L"\u30da\u30fc\u30b8 = '\u304b\u3089'", + L"concat(substring-before(@\u30a4\u30e1\u30fc\u30b8,'\u76ee\u5370'),'\u30a2\u30a4\u30b3\u30f3',substring-after(@\u30a4\u30e1\u30fc\u30b8,'\u76ee\u5370'))", + L"\u30bd\u30fc\u30b9|\u30c7\u30b9\u30c6\u30a3\u30cd\u30a4\u30b7\u30e7\u30f3", L"\u30da\u30fc\u30b8 != '\u307e\u3067' and \u30da\u30fc\u30b8 != '\u304b\u3089'", + L"substring-after(\u30a2\u30a4\u30b3\u30f3/@\u30a4\u30e1\u30fc\u30b8, '/\u5730\u56f3\u30d5\u30a1\u30a4\u30eb/\u76ee\u5370')", L"child::\u6bb5\u843d", + L"substring-before(\u6587\u5b57\u5217, \u6587\u5b57)", L"\u30bb\u30b0\u30e1\u30f3\u30c8/@\u6642\u523b", L"attribute::\u540d\u524d", L"descendant::\u6bb5\u843d", + L"ancestor::\u90e8\u5206", L"ancestor-or-self::\u90e8\u5206", L"descendant-or-self::\u6bb5\u843d", L"self::\u6bb5\u843d", L"child::\u7ae0/descendant::\u6bb5\u843d", + L"child::*/child::\u6bb5\u843d", L"/descendant::\u6bb5\u843d", L"/descendant::\u9806\u5e8f\u30ea\u30b9\u30c8/child::\u9805\u76ee", L"child::\u6bb5\u843d[position()=1]", + L"child::\u6bb5\u843d[position()=last()]", L"child::\u6bb5\u843d[position()=last()-1]", L"child::\u6bb5\u843d[position()>1]", L"following-sibling::\u7ae0[position()=1]", + L"preceding-sibling::\u7ae0[position()=1]", L"/descendant::\u56f3\u8868[position()=42]", L"/child::\u6587\u66f8/child::\u7ae0[position()=5]/child::\u7bc0[position()=2]", + L"child::\u6bb5\u843d[attribute::\u30bf\u30a4\u30d7='\u8b66\u544a']", L"child::\u6bb5\u843d[attribute::\u30bf\u30a4\u30d7='\u8b66\u544a'][position()=5]", + L"child::\u6bb5\u843d[position()=5][attribute::\u30bf\u30a4\u30d7='\u8b66\u544a']", L"child::\u7ae0[child::\u30bf\u30a4\u30c8\u30eb='\u306f\u3058\u3081\u306b']", + L"child::\u7ae0[child::\u30bf\u30a4\u30c8\u30eb]", L"child::*[self::\u7ae0 or self::\u4ed8\u9332]", L"child::*[self::\u7ae0 or self::\u4ed8\u9332][position()=last()]", + #else + L"/descendant-or-self::\x90e8\x5206", L"//\x90e8\x5206", L"substring('\xff11\xff12\xff13\xff14\xff15', 0, 3)", L"//\x30bf\x30a4\x30c8\x30eb | //\x30ea\x30f3\x30af", + L"\x8b0e//\x30bf\x30a4\x30c8\x30eb", L"//*[@\x30c7\x30b9\x30c6\x30a3\x30cd\x30a4\x30b7\x30e7\x30f3]", L"\x30da\x30fc\x30b8 = '\x304b\x3089'", + L"concat(substring-before(@\x30a4\x30e1\x30fc\x30b8,'\x76ee\x5370'),'\x30a2\x30a4\x30b3\x30f3',substring-after(@\x30a4\x30e1\x30fc\x30b8,'\x76ee\x5370'))", + L"\x30bd\x30fc\x30b9|\x30c7\x30b9\x30c6\x30a3\x30cd\x30a4\x30b7\x30e7\x30f3", L"\x30da\x30fc\x30b8 != '\x307e\x3067' and \x30da\x30fc\x30b8 != '\x304b\x3089'", + L"substring-after(\x30a2\x30a4\x30b3\x30f3/@\x30a4\x30e1\x30fc\x30b8, '/\x5730\x56f3\x30d5\x30a1\x30a4\x30eb/\x76ee\x5370')", L"child::\x6bb5\x843d", + L"substring-before(\x6587\x5b57\x5217, \x6587\x5b57)", L"\x30bb\x30b0\x30e1\x30f3\x30c8/@\x6642\x523b", L"attribute::\x540d\x524d", L"descendant::\x6bb5\x843d", + L"ancestor::\x90e8\x5206", L"ancestor-or-self::\x90e8\x5206", L"descendant-or-self::\x6bb5\x843d", L"self::\x6bb5\x843d", L"child::\x7ae0/descendant::\x6bb5\x843d", + L"child::*/child::\x6bb5\x843d", L"/descendant::\x6bb5\x843d", L"/descendant::\x9806\x5e8f\x30ea\x30b9\x30c8/child::\x9805\x76ee", L"child::\x6bb5\x843d[position()=1]", + L"child::\x6bb5\x843d[position()=last()]", L"child::\x6bb5\x843d[position()=last()-1]", L"child::\x6bb5\x843d[position()>1]", L"following-sibling::\x7ae0[position()=1]", + L"preceding-sibling::\x7ae0[position()=1]", L"/descendant::\x56f3\x8868[position()=42]", L"/child::\x6587\x66f8/child::\x7ae0[position()=5]/child::\x7bc0[position()=2]", + L"child::\x6bb5\x843d[attribute::\x30bf\x30a4\x30d7='\x8b66\x544a']", L"child::\x6bb5\x843d[attribute::\x30bf\x30a4\x30d7='\x8b66\x544a'][position()=5]", + L"child::\x6bb5\x843d[position()=5][attribute::\x30bf\x30a4\x30d7='\x8b66\x544a']", L"child::\x7ae0[child::\x30bf\x30a4\x30c8\x30eb='\x306f\x3058\x3081\x306b']", + L"child::\x7ae0[child::\x30bf\x30a4\x30c8\x30eb]", L"child::*[self::\x7ae0 or self::\x4ed8\x9332]", L"child::*[self::\x7ae0 or self::\x4ed8\x9332][position()=last()]", + #endif + }; + + for (size_t i = 0; i < sizeof(paths) / sizeof(paths[0]); ++i) + { + #if defined(PUGIXML_WCHAR_MODE) + xpath_query q(paths[i]); + #elif !defined(PUGIXML_NO_STL) + std::basic_string path_utf8 = pugi::as_utf8(paths[i]); + xpath_query q(path_utf8.c_str()); + #endif + } +} + +TEST(xpath_parse_invalid) +{ + const char_t* paths[] = + { + // From Jaxen tests + STR("//:p"), STR("/foo/bar/"), STR("12 + (count(//author)+count(//author/attribute::*)) / 2"), STR("id()/2"), STR("+"), + STR("///triple slash"), STR("/numbers numbers"), STR("/a/b[c > d]efg"), STR("/inv/child::"), STR("/invoice/@test[abcd"), + STR("/invoice/@test[abcd > x"), STR("string-length('a"), STR("/descendant::()"), STR("(1 + 1"), STR("!false()"), + STR("$author"), STR("10 + $foo"), STR("$foo:bar"), STR("$varname[@a='1']"), STR("foo/$variable/foo"), + STR(".[1]"), STR("chyld::foo"), STR("foo/tacos()"), STR("foo/tacos()"), STR("/foo/bar[baz"), STR("//"), STR("*:foo"), + STR("/cracker/cheese[(mold > 1) and (sense/taste"), + + // From xpath-as3 tests + STR("a b"), STR("//self::node())"), STR("/x/y[contains(self::node())"), STR("/x/y[contains(self::node()]"), STR("///"), STR("text::a"), + + // From haXe-xpath tests + STR("|/gjs"), STR("+3"), STR("/html/body/p != ---'div'/a"), STR(""), STR("@"), STR("#akf"), STR(",") + + // Miscellaneous + STR("..."), STR("...."), STR("**"), STR("****"), STR("******"), STR("..***..***.***.***..***..***..*"), STR("/[1]") + }; + + for (size_t i = 0; i < sizeof(paths) / sizeof(paths[0]); ++i) + { + CHECK_XPATH_FAIL(paths[i]); + } +} + +TEST_XML(xpath_parse_absolute, "
") +{ + CHECK_XPATH_NODESET(doc, STR("/")) % 1; + + CHECK_XPATH_NODESET(doc, STR("/div/s")) % 3; + CHECK_XPATH_NODESET(doc, STR("/ div /s")) % 3; + CHECK_XPATH_FAIL(STR("/ div 5")); + + CHECK_XPATH_NODESET(doc, STR("/*/s")) % 3; + CHECK_XPATH_NODESET(doc, STR("/ * /s")) % 3; + CHECK_XPATH_FAIL(STR("/ * 5")); + + CHECK_XPATH_NODESET(doc, STR("/*[/]")) % 2; +} + +#endif diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index b726a5a..2799f40 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -1,472 +1,472 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -TEST_XML(xpath_paths_axes_child, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child:: node()")); - - CHECK_XPATH_NODESET(n, STR("child:: node()")) % 4 % 7 % 8; // child, another, last - CHECK_XPATH_NODESET(n, STR("another/child:: node()")); - - CHECK_XPATH_NODESET(n, STR("@attr/child::node()")); -} - -TEST_XML(xpath_paths_axes_descendant, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("descendant:: node()")); - - CHECK_XPATH_NODESET(n, STR("descendant:: node()")) % 4 % 6 % 7 % 8 % 9; // child, subchild, another, subchild, last - CHECK_XPATH_NODESET(doc, STR("descendant:: node()")) % 2 % 4 % 6 % 7 % 8 % 9; // node, child, subchild, another, subchild, last - CHECK_XPATH_NODESET(n, STR("another/descendant:: node()")) % 8; // subchild - CHECK_XPATH_NODESET(n, STR("last/descendant:: node()")); - - CHECK_XPATH_NODESET(n, STR("@attr/descendant::node()")); -} - -TEST_XML(xpath_paths_axes_parent, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("parent:: node()")); - - CHECK_XPATH_NODESET(n.child(STR("child")), STR("parent:: node()")) % 2; // node - CHECK_XPATH_NODESET(n, STR("child/subchild/parent:: node()")) % 4; // child - CHECK_XPATH_NODESET(n, STR("@attr/parent:: node()")) % 2; // node - CHECK_XPATH_NODESET(n, STR("parent:: node()")) % 1; // root - CHECK_XPATH_NODESET(doc, STR("parent:: node()")); -} - -TEST_XML(xpath_paths_axes_ancestor, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("ancestor:: node()")); - - CHECK_XPATH_NODESET(n.child(STR("child")), STR("ancestor:: node()")) % 2 % 1; // node, root - CHECK_XPATH_NODESET(n, STR("child/subchild/ancestor:: node()")) % 4 % 2 % 1; // child, node, root - CHECK_XPATH_NODESET(n, STR("child/@attr/ancestor:: node()")) % 4 % 2 % 1; // child, node, root - CHECK_XPATH_NODESET(n, STR("ancestor:: node()")) % 1; // root - CHECK_XPATH_NODESET(doc, STR("ancestor:: node()")); -} - -TEST_XML(xpath_paths_axes_following_sibling, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("following-sibling:: node()")); - - CHECK_XPATH_NODESET(n.child(STR("child")), STR("following-sibling:: node()")) % 8 % 10; // another, last - CHECK_XPATH_NODESET(n.child(STR("last")), STR("following-sibling:: node()")); - CHECK_XPATH_NODESET(n, STR("@attr1/following-sibling:: node()")); // attributes are not siblings -} - -TEST_XML(xpath_paths_axes_preceding_sibling, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("preceding-sibling:: node()")); - - CHECK_XPATH_NODESET(n.child(STR("child")), STR("preceding-sibling:: node()")); - CHECK_XPATH_NODESET(n.child(STR("last")), STR("preceding-sibling:: node()")) % 8 % 5; // another, child - CHECK_XPATH_NODESET(n, STR("@attr2/following-sibling:: node()")); // attributes are not siblings -} - -TEST_XML(xpath_paths_axes_following, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("following:: node()")); - - CHECK_XPATH_NODESET(n, STR("following:: node()")); // no descendants - CHECK_XPATH_NODESET(n.child(STR("child")), STR("following:: node()")) % 8 % 9 % 10 % 11; // another, subchild, almost, last - CHECK_XPATH_NODESET(n.child(STR("child")).child(STR("subchild")), STR("following:: node()")) % 8 % 9 % 10 % 11; // another, subchild, almost, last - CHECK_XPATH_NODESET(n.child(STR("last")), STR("following:: node()")); - - CHECK_XPATH_NODESET(n, STR("@attr1/following::node()")) % 5 % 7 % 8 % 9 % 10 % 11; // child, subchild, another, subchild, almost, last - because @/following - CHECK_XPATH_NODESET(n, STR("child/@attr/following::node()")) % 7 % 8 % 9 % 10 % 11; // subchild, another, subchild, almost, last -} - -TEST_XML(xpath_paths_axes_preceding, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("preceding:: node()")); - - CHECK_XPATH_NODESET(n.child(STR("child")), STR("preceding:: node()")); // no ancestors - CHECK_XPATH_NODESET(n.child(STR("last")), STR("preceding:: node()")) % 11 % 9 % 8 % 7 % 5; // almost, subchild, another, subchild, child - CHECK_XPATH_NODESET(n.child(STR("another")).child(STR("subchild")), STR("preceding:: node()")) % 7 % 5; // subchild, child - CHECK_XPATH_NODESET(n, STR("preceding:: node()")); - - CHECK_XPATH_NODESET(n, STR("child/@attr/preceding::node()")); // no ancestors - CHECK_XPATH_NODESET(n, STR("//subchild[@id]/@id/preceding::node()")) % 7 % 5; // subchild, child -} - -TEST_XML(xpath_paths_axes_attribute, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("attribute:: node()")); - - CHECK_XPATH_NODESET(n.child(STR("child")), STR("attribute:: node()")) % 6; // child/@attr - CHECK_XPATH_NODESET(n.child(STR("last")), STR("attribute:: node()")); - CHECK_XPATH_NODESET(n, STR("attribute:: node()")) % 3 % 4; // node/@attr1 node/@attr2 - CHECK_XPATH_NODESET(doc, STR("descendant-or-self:: node()/attribute:: node()")) % 3 % 4 % 6; // all attributes - CHECK_XPATH_NODESET(n.child(STR("another")), STR("attribute:: node()")); // namespace nodes are not attributes - - CHECK_XPATH_NODESET(n, STR("@attr1/attribute::node()")); -} - -TEST_XML(xpath_paths_axes_namespace, "") -{ - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(n, STR("namespace:: node()")); // namespace nodes are not supported - CHECK_XPATH_NODESET(n, STR("@attr/attribute::node()")); -} - -TEST_XML(xpath_paths_axes_self, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("self:: node()")); - - CHECK_XPATH_NODESET(n.child(STR("child")), STR("self:: node()")) % 4; // child - CHECK_XPATH_NODESET(n, STR("self:: node()")) % 2; // node - CHECK_XPATH_NODESET(n, STR("child/self:: node()")) % 4; // child - CHECK_XPATH_NODESET(n, STR("child/@attr/self:: node()")) % 5; // @attr - CHECK_XPATH_NODESET(doc, STR("self:: node()")) % 1; // root -} - -TEST_XML(xpath_paths_axes_descendant_or_self, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("descendant-or-self:: node()")); - - CHECK_XPATH_NODESET(n, STR("descendant-or-self:: node()")) % 2 % 4 % 6 % 7 % 8 % 9; // node, child, subchild, another, subchild, last - CHECK_XPATH_NODESET(doc, STR("descendant-or-self:: node()")) % 1 % 2 % 4 % 6 % 7 % 8 % 9; // root, node, child, subchild, another, subchild, last - CHECK_XPATH_NODESET(n, STR("another/descendant-or-self:: node()")) % 7 % 8; // another, subchild - CHECK_XPATH_NODESET(n, STR("last/descendant-or-self:: node()")) % 9; // last - - CHECK_XPATH_NODESET(n, STR("child/@attr/descendant-or-self::node()")) % 5; // @attr -} - -TEST_XML(xpath_paths_axes_ancestor_or_self, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("ancestor-or-self:: node()")); - - CHECK_XPATH_NODESET(n.child(STR("child")), STR("ancestor-or-self:: node()")) % 4 % 2 % 1; // child, node, root - CHECK_XPATH_NODESET(n, STR("child/subchild/ancestor-or-self:: node()")) % 6 % 4 % 2 % 1; // subchild, child, node, root - CHECK_XPATH_NODESET(n, STR("child/@attr/ancestor-or-self:: node()")) % 5 % 4 % 2 % 1; // @attr, child, node, root - CHECK_XPATH_NODESET(n, STR("ancestor-or-self:: node()")) % 2 % 1; // root, node - CHECK_XPATH_NODESET(doc, STR("ancestor-or-self:: node()")) % 1; // root - CHECK_XPATH_NODESET(n, STR("ancestor-or-self:: node()")) % 2 % 1; // root, node - CHECK_XPATH_NODESET(n, STR("last/ancestor-or-self::node()")) % 9 % 2 % 1; // root, node, last -} - -TEST_XML(xpath_paths_axes_abbrev, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // @ axis - CHECK_XPATH_NODESET(c, STR("@attr")); - CHECK_XPATH_NODESET(n, STR("@attr")) % 3; - - // no axis - child implied - CHECK_XPATH_NODESET(c, STR("foo")); - CHECK_XPATH_NODESET(n, STR("foo")) % 4; - CHECK_XPATH_NODESET(doc, STR("node()")) % 2; - - // @ axis should disable all other axis specifiers - CHECK_XPATH_FAIL(STR("@child::foo")); - CHECK_XPATH_FAIL(STR("@attribute::foo")); -} - -TEST_XML(xpath_paths_nodetest_all, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("*")); - CHECK_XPATH_NODESET(c, STR("child::*")); - - CHECK_XPATH_NODESET(n, STR("*")) % 5 % 6 % 7 % 8; - CHECK_XPATH_NODESET(n, STR("child::*")) % 5 % 6 % 7 % 8; - CHECK_XPATH_NODESET(n, STR("attribute::*")) % 3 % 4; -} - -TEST_XML_FLAGS(xpath_paths_nodetest_name, "", parse_default | parse_pi) -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("c1")); - CHECK_XPATH_NODESET(c, STR("child::c1")); - - CHECK_XPATH_NODESET(n, STR("c1")) % 5; - CHECK_XPATH_NODESET(n, STR("x:c2")) % 6; - - CHECK_XPATH_NODESET(n, STR("child::c1")) % 5; - CHECK_XPATH_NODESET(n, STR("child::x:c2")) % 6; - - CHECK_XPATH_NODESET(n, STR("attribute::a1")) % 3; - CHECK_XPATH_NODESET(n, STR("attribute::x:a2")) % 4; - CHECK_XPATH_NODESET(n, STR("@x:a2")) % 4; -} - -TEST_XML(xpath_paths_nodetest_all_in_namespace, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("x:*")); - CHECK_XPATH_NODESET(c, STR("child::x:*")); - - CHECK_XPATH_NODESET(n, STR("x:*")) % 6 % 8; - CHECK_XPATH_NODESET(n, STR("child::x:*")) % 6 % 8; - - CHECK_XPATH_NODESET(n, STR("attribute::x:*")) % 4; - CHECK_XPATH_NODESET(n, STR("@x:*")) % 4; - - CHECK_XPATH_FAIL(STR(":*")); - CHECK_XPATH_FAIL(STR("@:*")); -} - -TEST_XML_FLAGS(xpath_paths_nodetest_type, "pcdata", parse_default | parse_pi | parse_comments) -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - // check on empty nodes - CHECK_XPATH_NODESET(c, STR("node()")); - CHECK_XPATH_NODESET(c, STR("text()")); - CHECK_XPATH_NODESET(c, STR("comment()")); - CHECK_XPATH_NODESET(c, STR("processing-instruction()")); - CHECK_XPATH_NODESET(c, STR("processing-instruction('foobar')")); - - // child axis - CHECK_XPATH_NODESET(n, STR("node()")) % 4 % 5 % 6 % 7 % 8 % 9; - CHECK_XPATH_NODESET(n, STR("text()")) % 4 % 9; - CHECK_XPATH_NODESET(n, STR("comment()")) % 8; - CHECK_XPATH_NODESET(n, STR("processing-instruction()")) % 6 % 7; - CHECK_XPATH_NODESET(n, STR("processing-instruction('pi2')")) % 7; - - // attribute axis - CHECK_XPATH_NODESET(n, STR("@node()")) % 3; - CHECK_XPATH_NODESET(n, STR("@text()")); - CHECK_XPATH_NODESET(n, STR("@comment()")); - CHECK_XPATH_NODESET(n, STR("@processing-instruction()")); - CHECK_XPATH_NODESET(n, STR("@processing-instruction('pi2')")); - - // incorrect 'argument' number - CHECK_XPATH_FAIL(STR("node('')")); - CHECK_XPATH_FAIL(STR("text('')")); - CHECK_XPATH_FAIL(STR("comment('')")); - CHECK_XPATH_FAIL(STR("processing-instruction(1)")); - CHECK_XPATH_FAIL(STR("processing-instruction('', '')")); - CHECK_XPATH_FAIL(STR("processing-instruction(concat('a', 'b'))")); -} - -TEST_XML_FLAGS(xpath_paths_nodetest_principal, "pcdata", parse_default | parse_pi | parse_comments) -{ - // node() test is true for any node type - CHECK_XPATH_NODESET(doc, STR("//node()")) % 2 % 4 % 5 % 6 % 7 % 8 % 9 % 10; - CHECK_XPATH_NODESET(doc, STR("//attribute::node()")) % 3 % 11; - CHECK_XPATH_NODESET(doc, STR("//attribute::node()/ancestor-or-self::node()")) % 1 % 2 % 3 % 10 % 11; - - // name test is true only for node with principal node type (depends on axis) - CHECK_XPATH_NODESET(doc, STR("node/child::child")) % 5; - CHECK_XPATH_NODESET(doc, STR("node/attribute::attr")) % 3; - CHECK_XPATH_NODESET(doc, STR("node/child::pi1")); - CHECK_XPATH_NODESET(doc, STR("node/child::attr")); - CHECK_XPATH_NODESET(doc, STR("node/child::child/self::child")) % 5; - CHECK_XPATH_NODESET(doc, STR("node/attribute::attr/self::attr")); // attribute is not of element type - CHECK_XPATH_NODESET(doc, STR("node/child::child/ancestor-or-self::child")) % 5; - CHECK_XPATH_NODESET(doc, STR("node/attribute::attr/ancestor-or-self::attr")); // attribute is not of element type - CHECK_XPATH_NODESET(doc, STR("node/child::child/descendant-or-self::child")) % 5; - CHECK_XPATH_NODESET(doc, STR("node/attribute::attr/descendant-or-self::attr")); // attribute is not of element type - - // any name test is true only for node with principal node type (depends on axis) - CHECK_XPATH_NODESET(doc, STR("node/child::*")) % 5; - CHECK_XPATH_NODESET(doc, STR("node/attribute::*")) % 3; - CHECK_XPATH_NODESET(doc, STR("node/child::*/self::*")) % 5; - CHECK_XPATH_NODESET(doc, STR("node/attribute::*/self::*")); // attribute is not of element type - CHECK_XPATH_NODESET(doc, STR("node/child::*/ancestor-or-self::*")) % 5 % 2; - CHECK_XPATH_NODESET(doc, STR("node/attribute::*/ancestor-or-self::*")) % 2; // attribute is not of element type - CHECK_XPATH_NODESET(doc, STR("node/child::*/descendant-or-self::*")) % 5; - CHECK_XPATH_NODESET(doc, STR("node/attribute::*/descendant-or-self::*")); // attribute is not of element type - - // namespace test is true only for node with principal node type (depends on axis) - CHECK_XPATH_NODESET(doc, STR("child::abra:*")) % 10; - CHECK_XPATH_NODESET(doc, STR("child::abra:*/attribute::abra:*")) % 11; - CHECK_XPATH_NODESET(doc, STR("child::abra:*/self::abra:*")) % 10; - CHECK_XPATH_NODESET(doc, STR("child::abra:*/attribute::abra:*/self::abra:*")); // attribute is not of element type - CHECK_XPATH_NODESET(doc, STR("child::abra:*/ancestor-or-self::abra:*")) % 10; - CHECK_XPATH_NODESET(doc, STR("child::abra:*/attribute::abra:*/ancestor-or-self::abra:*")) % 10; // attribute is not of element type - CHECK_XPATH_NODESET(doc, STR("child::abra:*/descendant-or-self::abra:*")) % 10; - CHECK_XPATH_NODESET(doc, STR("child::abra:*/attribute::abra:*/descendant-or-self::abra:*")); // attribute is not of element type -} - -TEST_XML(xpath_paths_absolute, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("/foo")); - CHECK_XPATH_NODESET(n, STR("/foo")); - CHECK_XPATH_NODESET(n, STR("/node/foo")) % 3; - CHECK_XPATH_NODESET(n.child(STR("foo")), STR("/node/foo")) % 3; - - CHECK_XPATH_NODESET(c, STR("/")); - CHECK_XPATH_NODESET(n, STR("/")) % 1; - CHECK_XPATH_NODESET(n.child(STR("foo")), STR("/")) % 1; -} - -TEST_XML(xpath_paths_step_abbrev, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR(".")); - CHECK_XPATH_NODESET(c, STR("..")); - - CHECK_XPATH_NODESET(n, STR(".")) % 2; - CHECK_XPATH_NODESET(n, STR("..")) % 1; - CHECK_XPATH_NODESET(n, STR("../node")) % 2; - CHECK_XPATH_NODESET(n.child(STR("foo")), STR("..")) % 2; - - CHECK_XPATH_FAIL(STR(".node")); - CHECK_XPATH_FAIL(STR("..node")); -} - -TEST_XML(xpath_paths_relative_abbrev, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("foo//bar")); - - CHECK_XPATH_NODESET(n, STR("foo/foo")) % 4 % 5; - CHECK_XPATH_NODESET(n, STR("foo//foo")) % 4 % 5; - CHECK_XPATH_NODESET(n, STR(".//foo")) % 3 % 4 % 5; -} - -TEST_XML(xpath_paths_absolute_abbrev, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("//bar")); - - CHECK_XPATH_NODESET(n, STR("//foo")) % 3 % 4 % 5; - CHECK_XPATH_NODESET(n.child(STR("foo")), STR("//foo")) % 3 % 4 % 5; - CHECK_XPATH_NODESET(doc, STR("//foo")) % 3 % 4 % 5; -} - -TEST_XML(xpath_paths_predicate_boolean, "") -{ - xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); - - CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[position()=1]")) % 6; - CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[position()=2]")) % 7; - CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[position()=1]")) % 4; - CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[position()=2]")) % 3; -} - -TEST_XML(xpath_paths_predicate_number, "") -{ - xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); - - CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[1]")) % 6; - CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[2]")) % 7; - CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[1]")) % 4; - CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[2]")) % 3; -} - -TEST_XML(xpath_paths_predicate_several, "") -{ - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(n, STR("employee")) % 3 % 4 % 6 % 8 % 11; - CHECK_XPATH_NODESET(n, STR("employee[@secretary]")) % 4 % 8 % 11; - CHECK_XPATH_NODESET(n, STR("employee[@assistant]")) % 6 % 8 % 11; - CHECK_XPATH_NODESET(n, STR("employee[@secretary][@assistant]")) % 8 % 11; - CHECK_XPATH_NODESET(n, STR("employee[@assistant][@secretary]")) % 8 % 11; - CHECK_XPATH_NODESET(n, STR("employee[@secretary and @assistant]")) % 8 % 11; -} - -TEST_XML(xpath_paths_predicate_filter_boolean, "") -{ - xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); - - CHECK_XPATH_NODESET(n, STR("(following-sibling::chapter)[position()=1]")) % 6; - CHECK_XPATH_NODESET(n, STR("(following-sibling::chapter)[position()=2]")) % 7; - CHECK_XPATH_NODESET(n, STR("(preceding-sibling::chapter)[position()=1]")) % 3; - CHECK_XPATH_NODESET(n, STR("(preceding-sibling::chapter)[position()=2]")) % 4; -} - -TEST_XML(xpath_paths_predicate_filter_number, "") -{ - xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); - - CHECK_XPATH_NODESET(n, STR("(following-sibling::chapter)[1]")) % 6; - CHECK_XPATH_NODESET(n, STR("(following-sibling::chapter)[2]")) % 7; - CHECK_XPATH_NODESET(n, STR("(preceding-sibling::chapter)[1]")) % 3; - CHECK_XPATH_NODESET(n, STR("(preceding-sibling::chapter)[2]")) % 4; -} - -TEST_XML(xpath_paths_predicate_filter_posinv, "") -{ - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(n, STR("employee")) % 3 % 4 % 6 % 8 % 11; - CHECK_XPATH_NODESET(n, STR("(employee[@secretary])[@assistant]")) % 8 % 11; - CHECK_XPATH_NODESET(n, STR("((employee)[@assistant])[@secretary]")) % 8 % 11; -} - -TEST_XML(xpath_paths_step_compose, "") -{ - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(n, STR("(.)/foo")) % 3 % 6; - CHECK_XPATH_NODESET(n, STR("(.)//foo")) % 3 % 4 % 5 % 6; - CHECK_XPATH_NODESET(n, STR("(./..)//*")) % 2 % 3 % 4 % 5 % 6; - - CHECK_XPATH_FAIL(STR("(1)/foo")); - CHECK_XPATH_FAIL(STR("(1)//foo")); -} - -TEST_XML(xpath_paths_descendant_double_slash_w3c, "") -{ - CHECK_XPATH_NODESET(doc, STR("//para")) % 3 % 4 % 5 % 6 % 7 % 8; - CHECK_XPATH_NODESET(doc, STR("/descendant::para")) % 3 % 4 % 5 % 6 % 7 % 8; - CHECK_XPATH_NODESET(doc, STR("//para[1]")) % 3 % 4 % 7; - CHECK_XPATH_NODESET(doc, STR("/descendant::para[1]")) % 3; -} - -TEST_XML(xpath_paths_needs_sorting, "") -{ - CHECK_XPATH_NODESET(doc, STR("(node/child/subchild)[2]")) % 7; -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +TEST_XML(xpath_paths_axes_child, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child:: node()")); + + CHECK_XPATH_NODESET(n, STR("child:: node()")) % 4 % 7 % 8; // child, another, last + CHECK_XPATH_NODESET(n, STR("another/child:: node()")); + + CHECK_XPATH_NODESET(n, STR("@attr/child::node()")); +} + +TEST_XML(xpath_paths_axes_descendant, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("descendant:: node()")); + + CHECK_XPATH_NODESET(n, STR("descendant:: node()")) % 4 % 6 % 7 % 8 % 9; // child, subchild, another, subchild, last + CHECK_XPATH_NODESET(doc, STR("descendant:: node()")) % 2 % 4 % 6 % 7 % 8 % 9; // node, child, subchild, another, subchild, last + CHECK_XPATH_NODESET(n, STR("another/descendant:: node()")) % 8; // subchild + CHECK_XPATH_NODESET(n, STR("last/descendant:: node()")); + + CHECK_XPATH_NODESET(n, STR("@attr/descendant::node()")); +} + +TEST_XML(xpath_paths_axes_parent, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("parent:: node()")); + + CHECK_XPATH_NODESET(n.child(STR("child")), STR("parent:: node()")) % 2; // node + CHECK_XPATH_NODESET(n, STR("child/subchild/parent:: node()")) % 4; // child + CHECK_XPATH_NODESET(n, STR("@attr/parent:: node()")) % 2; // node + CHECK_XPATH_NODESET(n, STR("parent:: node()")) % 1; // root + CHECK_XPATH_NODESET(doc, STR("parent:: node()")); +} + +TEST_XML(xpath_paths_axes_ancestor, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("ancestor:: node()")); + + CHECK_XPATH_NODESET(n.child(STR("child")), STR("ancestor:: node()")) % 2 % 1; // node, root + CHECK_XPATH_NODESET(n, STR("child/subchild/ancestor:: node()")) % 4 % 2 % 1; // child, node, root + CHECK_XPATH_NODESET(n, STR("child/@attr/ancestor:: node()")) % 4 % 2 % 1; // child, node, root + CHECK_XPATH_NODESET(n, STR("ancestor:: node()")) % 1; // root + CHECK_XPATH_NODESET(doc, STR("ancestor:: node()")); +} + +TEST_XML(xpath_paths_axes_following_sibling, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("following-sibling:: node()")); + + CHECK_XPATH_NODESET(n.child(STR("child")), STR("following-sibling:: node()")) % 8 % 10; // another, last + CHECK_XPATH_NODESET(n.child(STR("last")), STR("following-sibling:: node()")); + CHECK_XPATH_NODESET(n, STR("@attr1/following-sibling:: node()")); // attributes are not siblings +} + +TEST_XML(xpath_paths_axes_preceding_sibling, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("preceding-sibling:: node()")); + + CHECK_XPATH_NODESET(n.child(STR("child")), STR("preceding-sibling:: node()")); + CHECK_XPATH_NODESET(n.child(STR("last")), STR("preceding-sibling:: node()")) % 8 % 5; // another, child + CHECK_XPATH_NODESET(n, STR("@attr2/following-sibling:: node()")); // attributes are not siblings +} + +TEST_XML(xpath_paths_axes_following, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("following:: node()")); + + CHECK_XPATH_NODESET(n, STR("following:: node()")); // no descendants + CHECK_XPATH_NODESET(n.child(STR("child")), STR("following:: node()")) % 8 % 9 % 10 % 11; // another, subchild, almost, last + CHECK_XPATH_NODESET(n.child(STR("child")).child(STR("subchild")), STR("following:: node()")) % 8 % 9 % 10 % 11; // another, subchild, almost, last + CHECK_XPATH_NODESET(n.child(STR("last")), STR("following:: node()")); + + CHECK_XPATH_NODESET(n, STR("@attr1/following::node()")) % 5 % 7 % 8 % 9 % 10 % 11; // child, subchild, another, subchild, almost, last - because @/following + CHECK_XPATH_NODESET(n, STR("child/@attr/following::node()")) % 7 % 8 % 9 % 10 % 11; // subchild, another, subchild, almost, last +} + +TEST_XML(xpath_paths_axes_preceding, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("preceding:: node()")); + + CHECK_XPATH_NODESET(n.child(STR("child")), STR("preceding:: node()")); // no ancestors + CHECK_XPATH_NODESET(n.child(STR("last")), STR("preceding:: node()")) % 11 % 9 % 8 % 7 % 5; // almost, subchild, another, subchild, child + CHECK_XPATH_NODESET(n.child(STR("another")).child(STR("subchild")), STR("preceding:: node()")) % 7 % 5; // subchild, child + CHECK_XPATH_NODESET(n, STR("preceding:: node()")); + + CHECK_XPATH_NODESET(n, STR("child/@attr/preceding::node()")); // no ancestors + CHECK_XPATH_NODESET(n, STR("//subchild[@id]/@id/preceding::node()")) % 7 % 5; // subchild, child +} + +TEST_XML(xpath_paths_axes_attribute, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("attribute:: node()")); + + CHECK_XPATH_NODESET(n.child(STR("child")), STR("attribute:: node()")) % 6; // child/@attr + CHECK_XPATH_NODESET(n.child(STR("last")), STR("attribute:: node()")); + CHECK_XPATH_NODESET(n, STR("attribute:: node()")) % 3 % 4; // node/@attr1 node/@attr2 + CHECK_XPATH_NODESET(doc, STR("descendant-or-self:: node()/attribute:: node()")) % 3 % 4 % 6; // all attributes + CHECK_XPATH_NODESET(n.child(STR("another")), STR("attribute:: node()")); // namespace nodes are not attributes + + CHECK_XPATH_NODESET(n, STR("@attr1/attribute::node()")); +} + +TEST_XML(xpath_paths_axes_namespace, "") +{ + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(n, STR("namespace:: node()")); // namespace nodes are not supported + CHECK_XPATH_NODESET(n, STR("@attr/attribute::node()")); +} + +TEST_XML(xpath_paths_axes_self, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("self:: node()")); + + CHECK_XPATH_NODESET(n.child(STR("child")), STR("self:: node()")) % 4; // child + CHECK_XPATH_NODESET(n, STR("self:: node()")) % 2; // node + CHECK_XPATH_NODESET(n, STR("child/self:: node()")) % 4; // child + CHECK_XPATH_NODESET(n, STR("child/@attr/self:: node()")) % 5; // @attr + CHECK_XPATH_NODESET(doc, STR("self:: node()")) % 1; // root +} + +TEST_XML(xpath_paths_axes_descendant_or_self, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("descendant-or-self:: node()")); + + CHECK_XPATH_NODESET(n, STR("descendant-or-self:: node()")) % 2 % 4 % 6 % 7 % 8 % 9; // node, child, subchild, another, subchild, last + CHECK_XPATH_NODESET(doc, STR("descendant-or-self:: node()")) % 1 % 2 % 4 % 6 % 7 % 8 % 9; // root, node, child, subchild, another, subchild, last + CHECK_XPATH_NODESET(n, STR("another/descendant-or-self:: node()")) % 7 % 8; // another, subchild + CHECK_XPATH_NODESET(n, STR("last/descendant-or-self:: node()")) % 9; // last + + CHECK_XPATH_NODESET(n, STR("child/@attr/descendant-or-self::node()")) % 5; // @attr +} + +TEST_XML(xpath_paths_axes_ancestor_or_self, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("ancestor-or-self:: node()")); + + CHECK_XPATH_NODESET(n.child(STR("child")), STR("ancestor-or-self:: node()")) % 4 % 2 % 1; // child, node, root + CHECK_XPATH_NODESET(n, STR("child/subchild/ancestor-or-self:: node()")) % 6 % 4 % 2 % 1; // subchild, child, node, root + CHECK_XPATH_NODESET(n, STR("child/@attr/ancestor-or-self:: node()")) % 5 % 4 % 2 % 1; // @attr, child, node, root + CHECK_XPATH_NODESET(n, STR("ancestor-or-self:: node()")) % 2 % 1; // root, node + CHECK_XPATH_NODESET(doc, STR("ancestor-or-self:: node()")) % 1; // root + CHECK_XPATH_NODESET(n, STR("ancestor-or-self:: node()")) % 2 % 1; // root, node + CHECK_XPATH_NODESET(n, STR("last/ancestor-or-self::node()")) % 9 % 2 % 1; // root, node, last +} + +TEST_XML(xpath_paths_axes_abbrev, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // @ axis + CHECK_XPATH_NODESET(c, STR("@attr")); + CHECK_XPATH_NODESET(n, STR("@attr")) % 3; + + // no axis - child implied + CHECK_XPATH_NODESET(c, STR("foo")); + CHECK_XPATH_NODESET(n, STR("foo")) % 4; + CHECK_XPATH_NODESET(doc, STR("node()")) % 2; + + // @ axis should disable all other axis specifiers + CHECK_XPATH_FAIL(STR("@child::foo")); + CHECK_XPATH_FAIL(STR("@attribute::foo")); +} + +TEST_XML(xpath_paths_nodetest_all, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("*")); + CHECK_XPATH_NODESET(c, STR("child::*")); + + CHECK_XPATH_NODESET(n, STR("*")) % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(n, STR("child::*")) % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(n, STR("attribute::*")) % 3 % 4; +} + +TEST_XML_FLAGS(xpath_paths_nodetest_name, "", parse_default | parse_pi) +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("c1")); + CHECK_XPATH_NODESET(c, STR("child::c1")); + + CHECK_XPATH_NODESET(n, STR("c1")) % 5; + CHECK_XPATH_NODESET(n, STR("x:c2")) % 6; + + CHECK_XPATH_NODESET(n, STR("child::c1")) % 5; + CHECK_XPATH_NODESET(n, STR("child::x:c2")) % 6; + + CHECK_XPATH_NODESET(n, STR("attribute::a1")) % 3; + CHECK_XPATH_NODESET(n, STR("attribute::x:a2")) % 4; + CHECK_XPATH_NODESET(n, STR("@x:a2")) % 4; +} + +TEST_XML(xpath_paths_nodetest_all_in_namespace, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("x:*")); + CHECK_XPATH_NODESET(c, STR("child::x:*")); + + CHECK_XPATH_NODESET(n, STR("x:*")) % 6 % 8; + CHECK_XPATH_NODESET(n, STR("child::x:*")) % 6 % 8; + + CHECK_XPATH_NODESET(n, STR("attribute::x:*")) % 4; + CHECK_XPATH_NODESET(n, STR("@x:*")) % 4; + + CHECK_XPATH_FAIL(STR(":*")); + CHECK_XPATH_FAIL(STR("@:*")); +} + +TEST_XML_FLAGS(xpath_paths_nodetest_type, "pcdata", parse_default | parse_pi | parse_comments) +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + // check on empty nodes + CHECK_XPATH_NODESET(c, STR("node()")); + CHECK_XPATH_NODESET(c, STR("text()")); + CHECK_XPATH_NODESET(c, STR("comment()")); + CHECK_XPATH_NODESET(c, STR("processing-instruction()")); + CHECK_XPATH_NODESET(c, STR("processing-instruction('foobar')")); + + // child axis + CHECK_XPATH_NODESET(n, STR("node()")) % 4 % 5 % 6 % 7 % 8 % 9; + CHECK_XPATH_NODESET(n, STR("text()")) % 4 % 9; + CHECK_XPATH_NODESET(n, STR("comment()")) % 8; + CHECK_XPATH_NODESET(n, STR("processing-instruction()")) % 6 % 7; + CHECK_XPATH_NODESET(n, STR("processing-instruction('pi2')")) % 7; + + // attribute axis + CHECK_XPATH_NODESET(n, STR("@node()")) % 3; + CHECK_XPATH_NODESET(n, STR("@text()")); + CHECK_XPATH_NODESET(n, STR("@comment()")); + CHECK_XPATH_NODESET(n, STR("@processing-instruction()")); + CHECK_XPATH_NODESET(n, STR("@processing-instruction('pi2')")); + + // incorrect 'argument' number + CHECK_XPATH_FAIL(STR("node('')")); + CHECK_XPATH_FAIL(STR("text('')")); + CHECK_XPATH_FAIL(STR("comment('')")); + CHECK_XPATH_FAIL(STR("processing-instruction(1)")); + CHECK_XPATH_FAIL(STR("processing-instruction('', '')")); + CHECK_XPATH_FAIL(STR("processing-instruction(concat('a', 'b'))")); +} + +TEST_XML_FLAGS(xpath_paths_nodetest_principal, "pcdata", parse_default | parse_pi | parse_comments) +{ + // node() test is true for any node type + CHECK_XPATH_NODESET(doc, STR("//node()")) % 2 % 4 % 5 % 6 % 7 % 8 % 9 % 10; + CHECK_XPATH_NODESET(doc, STR("//attribute::node()")) % 3 % 11; + CHECK_XPATH_NODESET(doc, STR("//attribute::node()/ancestor-or-self::node()")) % 1 % 2 % 3 % 10 % 11; + + // name test is true only for node with principal node type (depends on axis) + CHECK_XPATH_NODESET(doc, STR("node/child::child")) % 5; + CHECK_XPATH_NODESET(doc, STR("node/attribute::attr")) % 3; + CHECK_XPATH_NODESET(doc, STR("node/child::pi1")); + CHECK_XPATH_NODESET(doc, STR("node/child::attr")); + CHECK_XPATH_NODESET(doc, STR("node/child::child/self::child")) % 5; + CHECK_XPATH_NODESET(doc, STR("node/attribute::attr/self::attr")); // attribute is not of element type + CHECK_XPATH_NODESET(doc, STR("node/child::child/ancestor-or-self::child")) % 5; + CHECK_XPATH_NODESET(doc, STR("node/attribute::attr/ancestor-or-self::attr")); // attribute is not of element type + CHECK_XPATH_NODESET(doc, STR("node/child::child/descendant-or-self::child")) % 5; + CHECK_XPATH_NODESET(doc, STR("node/attribute::attr/descendant-or-self::attr")); // attribute is not of element type + + // any name test is true only for node with principal node type (depends on axis) + CHECK_XPATH_NODESET(doc, STR("node/child::*")) % 5; + CHECK_XPATH_NODESET(doc, STR("node/attribute::*")) % 3; + CHECK_XPATH_NODESET(doc, STR("node/child::*/self::*")) % 5; + CHECK_XPATH_NODESET(doc, STR("node/attribute::*/self::*")); // attribute is not of element type + CHECK_XPATH_NODESET(doc, STR("node/child::*/ancestor-or-self::*")) % 5 % 2; + CHECK_XPATH_NODESET(doc, STR("node/attribute::*/ancestor-or-self::*")) % 2; // attribute is not of element type + CHECK_XPATH_NODESET(doc, STR("node/child::*/descendant-or-self::*")) % 5; + CHECK_XPATH_NODESET(doc, STR("node/attribute::*/descendant-or-self::*")); // attribute is not of element type + + // namespace test is true only for node with principal node type (depends on axis) + CHECK_XPATH_NODESET(doc, STR("child::abra:*")) % 10; + CHECK_XPATH_NODESET(doc, STR("child::abra:*/attribute::abra:*")) % 11; + CHECK_XPATH_NODESET(doc, STR("child::abra:*/self::abra:*")) % 10; + CHECK_XPATH_NODESET(doc, STR("child::abra:*/attribute::abra:*/self::abra:*")); // attribute is not of element type + CHECK_XPATH_NODESET(doc, STR("child::abra:*/ancestor-or-self::abra:*")) % 10; + CHECK_XPATH_NODESET(doc, STR("child::abra:*/attribute::abra:*/ancestor-or-self::abra:*")) % 10; // attribute is not of element type + CHECK_XPATH_NODESET(doc, STR("child::abra:*/descendant-or-self::abra:*")) % 10; + CHECK_XPATH_NODESET(doc, STR("child::abra:*/attribute::abra:*/descendant-or-self::abra:*")); // attribute is not of element type +} + +TEST_XML(xpath_paths_absolute, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("/foo")); + CHECK_XPATH_NODESET(n, STR("/foo")); + CHECK_XPATH_NODESET(n, STR("/node/foo")) % 3; + CHECK_XPATH_NODESET(n.child(STR("foo")), STR("/node/foo")) % 3; + + CHECK_XPATH_NODESET(c, STR("/")); + CHECK_XPATH_NODESET(n, STR("/")) % 1; + CHECK_XPATH_NODESET(n.child(STR("foo")), STR("/")) % 1; +} + +TEST_XML(xpath_paths_step_abbrev, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR(".")); + CHECK_XPATH_NODESET(c, STR("..")); + + CHECK_XPATH_NODESET(n, STR(".")) % 2; + CHECK_XPATH_NODESET(n, STR("..")) % 1; + CHECK_XPATH_NODESET(n, STR("../node")) % 2; + CHECK_XPATH_NODESET(n.child(STR("foo")), STR("..")) % 2; + + CHECK_XPATH_FAIL(STR(".node")); + CHECK_XPATH_FAIL(STR("..node")); +} + +TEST_XML(xpath_paths_relative_abbrev, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("foo//bar")); + + CHECK_XPATH_NODESET(n, STR("foo/foo")) % 4 % 5; + CHECK_XPATH_NODESET(n, STR("foo//foo")) % 4 % 5; + CHECK_XPATH_NODESET(n, STR(".//foo")) % 3 % 4 % 5; +} + +TEST_XML(xpath_paths_absolute_abbrev, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("//bar")); + + CHECK_XPATH_NODESET(n, STR("//foo")) % 3 % 4 % 5; + CHECK_XPATH_NODESET(n.child(STR("foo")), STR("//foo")) % 3 % 4 % 5; + CHECK_XPATH_NODESET(doc, STR("//foo")) % 3 % 4 % 5; +} + +TEST_XML(xpath_paths_predicate_boolean, "") +{ + xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); + + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[position()=1]")) % 6; + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[position()=2]")) % 7; + CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[position()=1]")) % 4; + CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[position()=2]")) % 3; +} + +TEST_XML(xpath_paths_predicate_number, "") +{ + xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); + + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[1]")) % 6; + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[2]")) % 7; + CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[1]")) % 4; + CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[2]")) % 3; +} + +TEST_XML(xpath_paths_predicate_several, "") +{ + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(n, STR("employee")) % 3 % 4 % 6 % 8 % 11; + CHECK_XPATH_NODESET(n, STR("employee[@secretary]")) % 4 % 8 % 11; + CHECK_XPATH_NODESET(n, STR("employee[@assistant]")) % 6 % 8 % 11; + CHECK_XPATH_NODESET(n, STR("employee[@secretary][@assistant]")) % 8 % 11; + CHECK_XPATH_NODESET(n, STR("employee[@assistant][@secretary]")) % 8 % 11; + CHECK_XPATH_NODESET(n, STR("employee[@secretary and @assistant]")) % 8 % 11; +} + +TEST_XML(xpath_paths_predicate_filter_boolean, "") +{ + xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); + + CHECK_XPATH_NODESET(n, STR("(following-sibling::chapter)[position()=1]")) % 6; + CHECK_XPATH_NODESET(n, STR("(following-sibling::chapter)[position()=2]")) % 7; + CHECK_XPATH_NODESET(n, STR("(preceding-sibling::chapter)[position()=1]")) % 3; + CHECK_XPATH_NODESET(n, STR("(preceding-sibling::chapter)[position()=2]")) % 4; +} + +TEST_XML(xpath_paths_predicate_filter_number, "") +{ + xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); + + CHECK_XPATH_NODESET(n, STR("(following-sibling::chapter)[1]")) % 6; + CHECK_XPATH_NODESET(n, STR("(following-sibling::chapter)[2]")) % 7; + CHECK_XPATH_NODESET(n, STR("(preceding-sibling::chapter)[1]")) % 3; + CHECK_XPATH_NODESET(n, STR("(preceding-sibling::chapter)[2]")) % 4; +} + +TEST_XML(xpath_paths_predicate_filter_posinv, "") +{ + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(n, STR("employee")) % 3 % 4 % 6 % 8 % 11; + CHECK_XPATH_NODESET(n, STR("(employee[@secretary])[@assistant]")) % 8 % 11; + CHECK_XPATH_NODESET(n, STR("((employee)[@assistant])[@secretary]")) % 8 % 11; +} + +TEST_XML(xpath_paths_step_compose, "") +{ + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(n, STR("(.)/foo")) % 3 % 6; + CHECK_XPATH_NODESET(n, STR("(.)//foo")) % 3 % 4 % 5 % 6; + CHECK_XPATH_NODESET(n, STR("(./..)//*")) % 2 % 3 % 4 % 5 % 6; + + CHECK_XPATH_FAIL(STR("(1)/foo")); + CHECK_XPATH_FAIL(STR("(1)//foo")); +} + +TEST_XML(xpath_paths_descendant_double_slash_w3c, "") +{ + CHECK_XPATH_NODESET(doc, STR("//para")) % 3 % 4 % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(doc, STR("/descendant::para")) % 3 % 4 % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(doc, STR("//para[1]")) % 3 % 4 % 7; + CHECK_XPATH_NODESET(doc, STR("/descendant::para[1]")) % 3; +} + +TEST_XML(xpath_paths_needs_sorting, "") +{ + CHECK_XPATH_NODESET(doc, STR("(node/child/subchild)[2]")) % 7; +} + +#endif diff --git a/tests/test_xpath_paths_abbrev_w3c.cpp b/tests/test_xpath_paths_abbrev_w3c.cpp index ebd13aa..af65752 100644 --- a/tests/test_xpath_paths_abbrev_w3c.cpp +++ b/tests/test_xpath_paths_abbrev_w3c.cpp @@ -1,217 +1,217 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -TEST_XML(xpath_paths_abbrev_w3c_1, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("para")); - CHECK_XPATH_NODESET(n, STR("para")) % 3 % 5; -} - -TEST_XML(xpath_paths_abbrev_w3c_2, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("*")); - CHECK_XPATH_NODESET(n, STR("*")) % 3 % 4 % 5; -} - -TEST_XML(xpath_paths_abbrev_w3c_3, "pcdata") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("text()")); - CHECK_XPATH_NODESET(n, STR("text()")) % 3 % 5; -} - -TEST_XML(xpath_paths_abbrev_w3c_4, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("@name")); - CHECK_XPATH_NODESET(n, STR("@name")) % 3; -} - -TEST_XML(xpath_paths_abbrev_w3c_5, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("@*")); - CHECK_XPATH_NODESET(n, STR("@*")) % 3 % 4; -} - -TEST_XML(xpath_paths_abbrev_w3c_6, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("para[1]")); - CHECK_XPATH_NODESET(n, STR("para[1]")) % 3; -} - -TEST_XML(xpath_paths_abbrev_w3c_7, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("para[last()]")); - CHECK_XPATH_NODESET(n, STR("para[last()]")) % 6; -} - -TEST_XML(xpath_paths_abbrev_w3c_8, "") -{ - xml_node c; - - CHECK_XPATH_NODESET(c, STR("*/para")); - CHECK_XPATH_NODESET(doc, STR("*/para")) % 3 % 9; -} - -TEST_XML(xpath_paths_abbrev_w3c_9, "
") -{ - xml_node c; - xml_node n = doc.child(STR("doc")).child(STR("chapter")); - - CHECK_XPATH_NODESET(c, STR("/doc/chapter[5]/section[2]")); - CHECK_XPATH_NODESET(n, STR("/doc/chapter[5]/section[2]")) % 9; - CHECK_XPATH_NODESET(doc, STR("/doc/chapter[5]/section[2]")) % 9; -} - -TEST_XML(xpath_paths_abbrev_w3c_10, "") -{ - xml_node c; - - CHECK_XPATH_NODESET(c, STR("chapter//para")); - CHECK_XPATH_NODESET(doc, STR("chapter//para")) % 3 % 4 % 5 % 7 % 9; -} - -TEST_XML(xpath_paths_abbrev_w3c_11, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("//para")); - CHECK_XPATH_NODESET(n, STR("//para")) % 3 % 4 % 5 % 7 % 9; - CHECK_XPATH_NODESET(n.child(STR("para")), STR("//para")) % 3 % 4 % 5 % 7 % 9; -} - -TEST_XML(xpath_paths_abbrev_w3c_12, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("//olist/item")); - CHECK_XPATH_NODESET(n, STR("//olist/item")) % 4 % 8 % 9; - CHECK_XPATH_NODESET(n.child(STR("olist")), STR("//olist/item")) % 4 % 8 % 9; -} - -TEST_XML(xpath_paths_abbrev_w3c_13, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR(".")); - CHECK_XPATH_NODESET(n, STR(".")) % 2; - CHECK_XPATH_NODESET(n.child(STR("child")), STR(".")) % 3; -} - -TEST_XML(xpath_paths_abbrev_w3c_14, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR(".//para")); - CHECK_XPATH_NODESET(n, STR(".//para")) % 3 % 4 % 5 % 7 % 9; - CHECK_XPATH_NODESET(n.child(STR("para")), STR(".//para")) % 4 % 5 % 7; -} - -TEST_XML(xpath_paths_abbrev_w3c_15, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("..")); - CHECK_XPATH_NODESET(n, STR("..")) % 1; - CHECK_XPATH_NODESET(n.child(STR("child")), STR("..")) % 2; -} - -TEST_XML(xpath_paths_abbrev_w3c_16, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("../@lang")); - CHECK_XPATH_NODESET(n, STR("../@lang")); - CHECK_XPATH_NODESET(n.child(STR("child")), STR("../@lang")) % 3; -} - -TEST_XML(xpath_paths_abbrev_w3c_17, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("para[@type=\"warning\"]")); - CHECK_XPATH_NODESET(n, STR("para[@type=\"warning\"]")) % 4 % 6 % 11 % 13 % 15; -} - -TEST_XML(xpath_paths_abbrev_w3c_18, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("para[@type=\"warning\"][5]")); - CHECK_XPATH_NODESET(n, STR("para[@type=\"warning\"][5]")) % 15; -} - -TEST_XML(xpath_paths_abbrev_w3c_19a, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("para[5][@type=\"warning\"]")); - CHECK_XPATH_NODESET(n, STR("para[5][@type=\"warning\"]")); -} - -TEST_XML(xpath_paths_abbrev_w3c_19b, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("para[5][@type=\"warning\"]")); - CHECK_XPATH_NODESET(n, STR("para[5][@type=\"warning\"]")) % 9; -} - -TEST_XML(xpath_paths_abbrev_w3c_20, "fooIntroductionintroductionIntroductionfoo") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("chapter[title=\"Introduction\"]")); - CHECK_XPATH_NODESET(n, STR("chapter[title=\"Introduction\"]")) % 6 % 13; -} - -TEST_XML(xpath_paths_abbrev_w3c_21, "fooIntroductionintroductionIntroductionfoo") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("chapter[title]")); - CHECK_XPATH_NODESET(n, STR("chapter[title]")) % 3 % 6 % 9 % 13; -} - -TEST_XML(xpath_paths_abbrev_w3c_22, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("employee[@secretary and @assistant]")); - CHECK_XPATH_NODESET(n, STR("employee[@secretary and @assistant]")) % 8 % 11; -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +TEST_XML(xpath_paths_abbrev_w3c_1, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("para")); + CHECK_XPATH_NODESET(n, STR("para")) % 3 % 5; +} + +TEST_XML(xpath_paths_abbrev_w3c_2, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("*")); + CHECK_XPATH_NODESET(n, STR("*")) % 3 % 4 % 5; +} + +TEST_XML(xpath_paths_abbrev_w3c_3, "pcdata") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("text()")); + CHECK_XPATH_NODESET(n, STR("text()")) % 3 % 5; +} + +TEST_XML(xpath_paths_abbrev_w3c_4, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("@name")); + CHECK_XPATH_NODESET(n, STR("@name")) % 3; +} + +TEST_XML(xpath_paths_abbrev_w3c_5, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("@*")); + CHECK_XPATH_NODESET(n, STR("@*")) % 3 % 4; +} + +TEST_XML(xpath_paths_abbrev_w3c_6, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("para[1]")); + CHECK_XPATH_NODESET(n, STR("para[1]")) % 3; +} + +TEST_XML(xpath_paths_abbrev_w3c_7, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("para[last()]")); + CHECK_XPATH_NODESET(n, STR("para[last()]")) % 6; +} + +TEST_XML(xpath_paths_abbrev_w3c_8, "") +{ + xml_node c; + + CHECK_XPATH_NODESET(c, STR("*/para")); + CHECK_XPATH_NODESET(doc, STR("*/para")) % 3 % 9; +} + +TEST_XML(xpath_paths_abbrev_w3c_9, "
") +{ + xml_node c; + xml_node n = doc.child(STR("doc")).child(STR("chapter")); + + CHECK_XPATH_NODESET(c, STR("/doc/chapter[5]/section[2]")); + CHECK_XPATH_NODESET(n, STR("/doc/chapter[5]/section[2]")) % 9; + CHECK_XPATH_NODESET(doc, STR("/doc/chapter[5]/section[2]")) % 9; +} + +TEST_XML(xpath_paths_abbrev_w3c_10, "") +{ + xml_node c; + + CHECK_XPATH_NODESET(c, STR("chapter//para")); + CHECK_XPATH_NODESET(doc, STR("chapter//para")) % 3 % 4 % 5 % 7 % 9; +} + +TEST_XML(xpath_paths_abbrev_w3c_11, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("//para")); + CHECK_XPATH_NODESET(n, STR("//para")) % 3 % 4 % 5 % 7 % 9; + CHECK_XPATH_NODESET(n.child(STR("para")), STR("//para")) % 3 % 4 % 5 % 7 % 9; +} + +TEST_XML(xpath_paths_abbrev_w3c_12, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("//olist/item")); + CHECK_XPATH_NODESET(n, STR("//olist/item")) % 4 % 8 % 9; + CHECK_XPATH_NODESET(n.child(STR("olist")), STR("//olist/item")) % 4 % 8 % 9; +} + +TEST_XML(xpath_paths_abbrev_w3c_13, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR(".")); + CHECK_XPATH_NODESET(n, STR(".")) % 2; + CHECK_XPATH_NODESET(n.child(STR("child")), STR(".")) % 3; +} + +TEST_XML(xpath_paths_abbrev_w3c_14, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR(".//para")); + CHECK_XPATH_NODESET(n, STR(".//para")) % 3 % 4 % 5 % 7 % 9; + CHECK_XPATH_NODESET(n.child(STR("para")), STR(".//para")) % 4 % 5 % 7; +} + +TEST_XML(xpath_paths_abbrev_w3c_15, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("..")); + CHECK_XPATH_NODESET(n, STR("..")) % 1; + CHECK_XPATH_NODESET(n.child(STR("child")), STR("..")) % 2; +} + +TEST_XML(xpath_paths_abbrev_w3c_16, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("../@lang")); + CHECK_XPATH_NODESET(n, STR("../@lang")); + CHECK_XPATH_NODESET(n.child(STR("child")), STR("../@lang")) % 3; +} + +TEST_XML(xpath_paths_abbrev_w3c_17, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("para[@type=\"warning\"]")); + CHECK_XPATH_NODESET(n, STR("para[@type=\"warning\"]")) % 4 % 6 % 11 % 13 % 15; +} + +TEST_XML(xpath_paths_abbrev_w3c_18, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("para[@type=\"warning\"][5]")); + CHECK_XPATH_NODESET(n, STR("para[@type=\"warning\"][5]")) % 15; +} + +TEST_XML(xpath_paths_abbrev_w3c_19a, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("para[5][@type=\"warning\"]")); + CHECK_XPATH_NODESET(n, STR("para[5][@type=\"warning\"]")); +} + +TEST_XML(xpath_paths_abbrev_w3c_19b, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("para[5][@type=\"warning\"]")); + CHECK_XPATH_NODESET(n, STR("para[5][@type=\"warning\"]")) % 9; +} + +TEST_XML(xpath_paths_abbrev_w3c_20, "fooIntroductionintroductionIntroductionfoo") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("chapter[title=\"Introduction\"]")); + CHECK_XPATH_NODESET(n, STR("chapter[title=\"Introduction\"]")) % 6 % 13; +} + +TEST_XML(xpath_paths_abbrev_w3c_21, "fooIntroductionintroductionIntroductionfoo") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("chapter[title]")); + CHECK_XPATH_NODESET(n, STR("chapter[title]")) % 3 % 6 % 9 % 13; +} + +TEST_XML(xpath_paths_abbrev_w3c_22, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("employee[@secretary and @assistant]")); + CHECK_XPATH_NODESET(n, STR("employee[@secretary and @assistant]")) % 8 % 11; +} + +#endif diff --git a/tests/test_xpath_paths_w3c.cpp b/tests/test_xpath_paths_w3c.cpp index d3f1554..2005bc5 100644 --- a/tests/test_xpath_paths_w3c.cpp +++ b/tests/test_xpath_paths_w3c.cpp @@ -1,310 +1,310 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -TEST_XML(xpath_paths_w3c_1, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::para")); - CHECK_XPATH_NODESET(n, STR("child::para")) % 3 % 5; -} - -TEST_XML(xpath_paths_w3c_2, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::*")); - CHECK_XPATH_NODESET(n, STR("child::*")) % 3 % 4 % 5; -} - -TEST_XML(xpath_paths_w3c_3, "pcdata") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::text()")); - CHECK_XPATH_NODESET(n, STR("child::text()")) % 3 % 5; -} - -TEST_XML(xpath_paths_w3c_4, "pcdata") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::node()")); - CHECK_XPATH_NODESET(n, STR("child::node()")) % 3 % 4 % 5; -} - -TEST_XML(xpath_paths_w3c_5, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("attribute::name")); - CHECK_XPATH_NODESET(n, STR("attribute::name")) % 3; -} - -TEST_XML(xpath_paths_w3c_6, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("attribute::*")); - CHECK_XPATH_NODESET(n, STR("attribute::*")) % 3 % 4; -} - -TEST_XML(xpath_paths_w3c_7, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("descendant::para")); - CHECK_XPATH_NODESET(n, STR("descendant::para")) % 3 % 4 % 5 % 7 % 9; - CHECK_XPATH_NODESET(n.child(STR("para")), STR("descendant::para")) % 4 % 5 % 7; -} - -TEST_XML(xpath_paths_w3c_8, "
") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("ancestor::div")); - CHECK_XPATH_NODESET(n.child(STR("div")).child(STR("font")).child(STR("div")).child(STR("div")), STR("ancestor::div")) % 5 % 3; -} - -TEST_XML(xpath_paths_w3c_9, "
") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("ancestor-or-self::div")); - CHECK_XPATH_NODESET(n.child(STR("div")).child(STR("font")).child(STR("div")).child(STR("div")), STR("ancestor-or-self::div")) % 6 % 5 % 3; -} - -TEST_XML(xpath_paths_w3c_10, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("descendant-or-self::para")); - CHECK_XPATH_NODESET(n, STR("descendant-or-self::para")) % 3 % 4 % 5 % 7 % 9; - CHECK_XPATH_NODESET(n.child(STR("para")), STR("descendant-or-self::para")) % 3 % 4 % 5 % 7; -} - -TEST_XML(xpath_paths_w3c_11, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("self::para")); - CHECK_XPATH_NODESET(n, STR("self::para")); - CHECK_XPATH_NODESET(n.child(STR("para")), STR("self::para")) % 3; -} - -TEST_XML(xpath_paths_w3c_12, "") -{ - xml_node c; - - CHECK_XPATH_NODESET(c, STR("child::chapter/descendant::para")); - CHECK_XPATH_NODESET(doc, STR("child::chapter/descendant::para")) % 3 % 4 % 5 % 7 % 9; -} - -TEST_XML(xpath_paths_w3c_13, "") -{ - xml_node c; - - CHECK_XPATH_NODESET(c, STR("child::*/child::para")); - CHECK_XPATH_NODESET(doc, STR("child::*/child::para")) % 3 % 9; -} - -TEST_XML(xpath_paths_w3c_14, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("/")); - - CHECK_XPATH_NODESET(doc, STR("/")) % 1; - CHECK_XPATH_NODESET(n, STR("/")) % 1; - CHECK_XPATH_NODESET(n.child(STR("para")), STR("/")) % 1; -} - -TEST_XML(xpath_paths_w3c_15, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("/descendant::para")); - CHECK_XPATH_NODESET(n, STR("/descendant::para")) % 3 % 4 % 5 % 7 % 9; - CHECK_XPATH_NODESET(n.child(STR("para")), STR("/descendant::para")) % 3 % 4 % 5 % 7 % 9; -} - -TEST_XML(xpath_paths_w3c_16, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("/descendant::olist/child::item")); - CHECK_XPATH_NODESET(n, STR("/descendant::olist/child::item")) % 4 % 8 % 9; - CHECK_XPATH_NODESET(n.child(STR("olist")), STR("/descendant::olist/child::item")) % 4 % 8 % 9; -} - -TEST_XML(xpath_paths_w3c_17, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::para[position()=1]")); - CHECK_XPATH_NODESET(n, STR("child::para[position()=1]")) % 3; -} - -TEST_XML(xpath_paths_w3c_18, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::para[position()=last()]")); - CHECK_XPATH_NODESET(n, STR("child::para[position()=last()]")) % 6; -} - -TEST_XML(xpath_paths_w3c_19, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::para[position()=last()-1]")); - CHECK_XPATH_NODESET(n, STR("child::para[position()=last()-1]")) % 5; -} - -TEST_XML(xpath_paths_w3c_20, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::para[position()>1]")); - CHECK_XPATH_NODESET(n, STR("child::para[position()>1]")) % 4 % 5 % 6; -} - -TEST_XML(xpath_paths_w3c_21, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); - - CHECK_XPATH_NODESET(c, STR("following-sibling::chapter[position()=1]")); - CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[position()=1]")) % 6; -} - -TEST_XML(xpath_paths_w3c_22, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); - - CHECK_XPATH_NODESET(c, STR("preceding-sibling::chapter[position()=1]")); - CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[position()=1]")) % 4; -} - -TEST_XML(xpath_paths_w3c_23, "
") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("/descendant::figure[position()=4]")); - CHECK_XPATH_NODESET(n, STR("/descendant::figure[position()=4]")) % 7; - CHECK_XPATH_NODESET(n.child(STR("figure")), STR("/descendant::figure[position()=4]")) % 7; -} - -TEST_XML(xpath_paths_w3c_24, "
") -{ - xml_node c; - xml_node n = doc.child(STR("doc")).child(STR("chapter")); - - CHECK_XPATH_NODESET(c, STR("/child::doc/child::chapter[position()=5]/child::section[position()=2]")); - CHECK_XPATH_NODESET(n, STR("/child::doc/child::chapter[position()=5]/child::section[position()=2]")) % 9; - CHECK_XPATH_NODESET(doc, STR("/child::doc/child::chapter[position()=5]/child::section[position()=2]")) % 9; -} - -TEST_XML(xpath_paths_w3c_25, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::para[attribute::type=\"warning\"]")); - CHECK_XPATH_NODESET(n, STR("child::para[attribute::type=\"warning\"]")) % 4 % 6 % 11 % 13 % 15; -} - -TEST_XML(xpath_paths_w3c_26, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::para[attribute::type=\"warning\"][position()=5]")); - CHECK_XPATH_NODESET(n, STR("child::para[attribute::type=\"warning\"][position()=5]")) % 15; -} - -TEST_XML(xpath_paths_w3c_27a, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::para[position()=5][attribute::type=\"warning\"]")); - CHECK_XPATH_NODESET(n, STR("child::para[position()=5][attribute::type=\"warning\"]")); -} - -TEST_XML(xpath_paths_w3c_27b, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::para[position()=5][attribute::type=\"warning\"]")); - CHECK_XPATH_NODESET(n, STR("child::para[position()=5][attribute::type=\"warning\"]")) % 9; -} - -TEST_XML(xpath_paths_w3c_28, "fooIntroductionintroductionIntroductionfoo") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::chapter[child::title='Introduction']")); - CHECK_XPATH_NODESET(n, STR("child::chapter[child::title='Introduction']")) % 6 % 13; -} - -TEST_XML(xpath_paths_w3c_29, "fooIntroductionintroductionIntroductionfoo") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::chapter[child::title]")); - CHECK_XPATH_NODESET(n, STR("child::chapter[child::title]")) % 3 % 6 % 9 % 13; -} - -TEST_XML(xpath_paths_w3c_30, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::*[self::chapter or self::appendix]")); - CHECK_XPATH_NODESET(n, STR("child::*[self::chapter or self::appendix]")) % 4 % 5 % 7; -} - -TEST_XML(xpath_paths_w3c_31a, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::*[self::chapter or self::appendix][position()=last()]")); - CHECK_XPATH_NODESET(n, STR("child::*[self::chapter or self::appendix][position()=last()]")) % 7; -} - -TEST_XML(xpath_paths_w3c_31b, "") -{ - xml_node c; - xml_node n = doc.child(STR("node")); - - CHECK_XPATH_NODESET(c, STR("child::*[self::chapter or self::appendix][position()=last()]")); - CHECK_XPATH_NODESET(n, STR("child::*[self::chapter or self::appendix][position()=last()]")) % 8; -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +TEST_XML(xpath_paths_w3c_1, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::para")); + CHECK_XPATH_NODESET(n, STR("child::para")) % 3 % 5; +} + +TEST_XML(xpath_paths_w3c_2, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::*")); + CHECK_XPATH_NODESET(n, STR("child::*")) % 3 % 4 % 5; +} + +TEST_XML(xpath_paths_w3c_3, "pcdata") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::text()")); + CHECK_XPATH_NODESET(n, STR("child::text()")) % 3 % 5; +} + +TEST_XML(xpath_paths_w3c_4, "pcdata") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::node()")); + CHECK_XPATH_NODESET(n, STR("child::node()")) % 3 % 4 % 5; +} + +TEST_XML(xpath_paths_w3c_5, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("attribute::name")); + CHECK_XPATH_NODESET(n, STR("attribute::name")) % 3; +} + +TEST_XML(xpath_paths_w3c_6, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("attribute::*")); + CHECK_XPATH_NODESET(n, STR("attribute::*")) % 3 % 4; +} + +TEST_XML(xpath_paths_w3c_7, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("descendant::para")); + CHECK_XPATH_NODESET(n, STR("descendant::para")) % 3 % 4 % 5 % 7 % 9; + CHECK_XPATH_NODESET(n.child(STR("para")), STR("descendant::para")) % 4 % 5 % 7; +} + +TEST_XML(xpath_paths_w3c_8, "
") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("ancestor::div")); + CHECK_XPATH_NODESET(n.child(STR("div")).child(STR("font")).child(STR("div")).child(STR("div")), STR("ancestor::div")) % 5 % 3; +} + +TEST_XML(xpath_paths_w3c_9, "
") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("ancestor-or-self::div")); + CHECK_XPATH_NODESET(n.child(STR("div")).child(STR("font")).child(STR("div")).child(STR("div")), STR("ancestor-or-self::div")) % 6 % 5 % 3; +} + +TEST_XML(xpath_paths_w3c_10, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("descendant-or-self::para")); + CHECK_XPATH_NODESET(n, STR("descendant-or-self::para")) % 3 % 4 % 5 % 7 % 9; + CHECK_XPATH_NODESET(n.child(STR("para")), STR("descendant-or-self::para")) % 3 % 4 % 5 % 7; +} + +TEST_XML(xpath_paths_w3c_11, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("self::para")); + CHECK_XPATH_NODESET(n, STR("self::para")); + CHECK_XPATH_NODESET(n.child(STR("para")), STR("self::para")) % 3; +} + +TEST_XML(xpath_paths_w3c_12, "") +{ + xml_node c; + + CHECK_XPATH_NODESET(c, STR("child::chapter/descendant::para")); + CHECK_XPATH_NODESET(doc, STR("child::chapter/descendant::para")) % 3 % 4 % 5 % 7 % 9; +} + +TEST_XML(xpath_paths_w3c_13, "") +{ + xml_node c; + + CHECK_XPATH_NODESET(c, STR("child::*/child::para")); + CHECK_XPATH_NODESET(doc, STR("child::*/child::para")) % 3 % 9; +} + +TEST_XML(xpath_paths_w3c_14, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("/")); + + CHECK_XPATH_NODESET(doc, STR("/")) % 1; + CHECK_XPATH_NODESET(n, STR("/")) % 1; + CHECK_XPATH_NODESET(n.child(STR("para")), STR("/")) % 1; +} + +TEST_XML(xpath_paths_w3c_15, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("/descendant::para")); + CHECK_XPATH_NODESET(n, STR("/descendant::para")) % 3 % 4 % 5 % 7 % 9; + CHECK_XPATH_NODESET(n.child(STR("para")), STR("/descendant::para")) % 3 % 4 % 5 % 7 % 9; +} + +TEST_XML(xpath_paths_w3c_16, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("/descendant::olist/child::item")); + CHECK_XPATH_NODESET(n, STR("/descendant::olist/child::item")) % 4 % 8 % 9; + CHECK_XPATH_NODESET(n.child(STR("olist")), STR("/descendant::olist/child::item")) % 4 % 8 % 9; +} + +TEST_XML(xpath_paths_w3c_17, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::para[position()=1]")); + CHECK_XPATH_NODESET(n, STR("child::para[position()=1]")) % 3; +} + +TEST_XML(xpath_paths_w3c_18, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::para[position()=last()]")); + CHECK_XPATH_NODESET(n, STR("child::para[position()=last()]")) % 6; +} + +TEST_XML(xpath_paths_w3c_19, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::para[position()=last()-1]")); + CHECK_XPATH_NODESET(n, STR("child::para[position()=last()-1]")) % 5; +} + +TEST_XML(xpath_paths_w3c_20, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::para[position()>1]")); + CHECK_XPATH_NODESET(n, STR("child::para[position()>1]")) % 4 % 5 % 6; +} + +TEST_XML(xpath_paths_w3c_21, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); + + CHECK_XPATH_NODESET(c, STR("following-sibling::chapter[position()=1]")); + CHECK_XPATH_NODESET(n, STR("following-sibling::chapter[position()=1]")) % 6; +} + +TEST_XML(xpath_paths_w3c_22, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")).child(STR("chapter")).next_sibling().next_sibling(); + + CHECK_XPATH_NODESET(c, STR("preceding-sibling::chapter[position()=1]")); + CHECK_XPATH_NODESET(n, STR("preceding-sibling::chapter[position()=1]")) % 4; +} + +TEST_XML(xpath_paths_w3c_23, "
") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("/descendant::figure[position()=4]")); + CHECK_XPATH_NODESET(n, STR("/descendant::figure[position()=4]")) % 7; + CHECK_XPATH_NODESET(n.child(STR("figure")), STR("/descendant::figure[position()=4]")) % 7; +} + +TEST_XML(xpath_paths_w3c_24, "
") +{ + xml_node c; + xml_node n = doc.child(STR("doc")).child(STR("chapter")); + + CHECK_XPATH_NODESET(c, STR("/child::doc/child::chapter[position()=5]/child::section[position()=2]")); + CHECK_XPATH_NODESET(n, STR("/child::doc/child::chapter[position()=5]/child::section[position()=2]")) % 9; + CHECK_XPATH_NODESET(doc, STR("/child::doc/child::chapter[position()=5]/child::section[position()=2]")) % 9; +} + +TEST_XML(xpath_paths_w3c_25, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::para[attribute::type=\"warning\"]")); + CHECK_XPATH_NODESET(n, STR("child::para[attribute::type=\"warning\"]")) % 4 % 6 % 11 % 13 % 15; +} + +TEST_XML(xpath_paths_w3c_26, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::para[attribute::type=\"warning\"][position()=5]")); + CHECK_XPATH_NODESET(n, STR("child::para[attribute::type=\"warning\"][position()=5]")) % 15; +} + +TEST_XML(xpath_paths_w3c_27a, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::para[position()=5][attribute::type=\"warning\"]")); + CHECK_XPATH_NODESET(n, STR("child::para[position()=5][attribute::type=\"warning\"]")); +} + +TEST_XML(xpath_paths_w3c_27b, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::para[position()=5][attribute::type=\"warning\"]")); + CHECK_XPATH_NODESET(n, STR("child::para[position()=5][attribute::type=\"warning\"]")) % 9; +} + +TEST_XML(xpath_paths_w3c_28, "fooIntroductionintroductionIntroductionfoo") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::chapter[child::title='Introduction']")); + CHECK_XPATH_NODESET(n, STR("child::chapter[child::title='Introduction']")) % 6 % 13; +} + +TEST_XML(xpath_paths_w3c_29, "fooIntroductionintroductionIntroductionfoo") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::chapter[child::title]")); + CHECK_XPATH_NODESET(n, STR("child::chapter[child::title]")) % 3 % 6 % 9 % 13; +} + +TEST_XML(xpath_paths_w3c_30, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::*[self::chapter or self::appendix]")); + CHECK_XPATH_NODESET(n, STR("child::*[self::chapter or self::appendix]")) % 4 % 5 % 7; +} + +TEST_XML(xpath_paths_w3c_31a, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::*[self::chapter or self::appendix][position()=last()]")); + CHECK_XPATH_NODESET(n, STR("child::*[self::chapter or self::appendix][position()=last()]")) % 7; +} + +TEST_XML(xpath_paths_w3c_31b, "") +{ + xml_node c; + xml_node n = doc.child(STR("node")); + + CHECK_XPATH_NODESET(c, STR("child::*[self::chapter or self::appendix][position()=last()]")); + CHECK_XPATH_NODESET(n, STR("child::*[self::chapter or self::appendix][position()=last()]")) % 8; +} + +#endif diff --git a/tests/test_xpath_xalan_1.cpp b/tests/test_xpath_xalan_1.cpp index 7be711f..b862b1e 100644 --- a/tests/test_xpath_xalan_1.cpp +++ b/tests/test_xpath_xalan_1.cpp @@ -1,407 +1,407 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -TEST(xpath_xalan_boolean_1) -{ - xml_node c; - - CHECK_XPATH_BOOLEAN(c, STR("true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("true() and true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("true() or true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(true())"), false); - CHECK_XPATH_BOOLEAN(c, STR("boolean('')"), false); - CHECK_XPATH_BOOLEAN(c, STR("1>2"), false); - CHECK_XPATH_BOOLEAN(c, STR("1>=2"), false); - CHECK_XPATH_BOOLEAN(c, STR("false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("1=1"), true); - CHECK_XPATH_BOOLEAN(c, STR("1=2"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 = 1.00"), true); - CHECK_XPATH_BOOLEAN(c, STR("0 = -0"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 = '001'"), true); - CHECK_XPATH_BOOLEAN(c, STR("true()='0'"), true); - CHECK_XPATH_BOOLEAN(c, STR("false()=''"), true); - CHECK_XPATH_BOOLEAN(c, STR("true()=2"), true); - CHECK_XPATH_BOOLEAN(c, STR("false()=0"), true); - CHECK_XPATH_BOOLEAN(c, STR("false() and false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("'foo' and 'fop'"), true); - CHECK_XPATH_BOOLEAN(c, STR("true() and false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("false() and true()"), false); - CHECK_XPATH_BOOLEAN(c, STR("'1' and '0'"), true); - CHECK_XPATH_BOOLEAN(c, STR("true() or false()"), true); - CHECK_XPATH_BOOLEAN(c, STR("false() or true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("false() or false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 or ''"), false); - CHECK_XPATH_BOOLEAN(c, STR("not(false())"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(false() = false())"), false); - CHECK_XPATH_BOOLEAN(c, STR("not(true() = false())"), true); - CHECK_XPATH_BOOLEAN(c, STR("not('')"), true); - CHECK_XPATH_BOOLEAN(c, STR("not('0')"), false); - CHECK_XPATH_BOOLEAN(c, STR("boolean('0')"), true); - CHECK_XPATH_BOOLEAN(c, STR("boolean(0)"), false); - CHECK_XPATH_BOOLEAN(c, STR("boolean(-0)"), false); - CHECK_XPATH_BOOLEAN(c, STR("boolean(1)"), true); - CHECK_XPATH_BOOLEAN(c, STR("boolean(1 div 0)"), true); - CHECK_XPATH_BOOLEAN(c, STR("boolean(0 div 0)"), false); -} - -TEST_XML(xpath_xalan_boolean_2, "") -{ - CHECK_XPATH_BOOLEAN(doc, STR("boolean(doc)"), true); - CHECK_XPATH_BOOLEAN(doc, STR("boolean(foo)"), false); -} - -TEST(xpath_xalan_boolean_3) -{ - xml_node c; - - CHECK_XPATH_BOOLEAN(c, STR("1>1"), false); - CHECK_XPATH_BOOLEAN(c, STR("2>1"), true); - CHECK_XPATH_BOOLEAN(c, STR("1<2"), true); - CHECK_XPATH_BOOLEAN(c, STR("1<1"), false); - CHECK_XPATH_BOOLEAN(c, STR("2<1"), false); - CHECK_XPATH_BOOLEAN(c, STR("'2'>'1'"), true); - CHECK_XPATH_BOOLEAN(c, STR("0 > -0"), false); - CHECK_XPATH_BOOLEAN(c, STR("2>=2"), true); - CHECK_XPATH_BOOLEAN(c, STR("2>=1"), true); - CHECK_XPATH_BOOLEAN(c, STR("1<=2"), true); - CHECK_XPATH_BOOLEAN(c, STR("1<=1"), true); - CHECK_XPATH_BOOLEAN(c, STR("2<=1"), false); - CHECK_XPATH_BOOLEAN(c, STR("false() and 1 div 0"), false); - CHECK_XPATH_BOOLEAN(c, STR("true() or 1 div 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("1!=1"), false); - CHECK_XPATH_BOOLEAN(c, STR("1!=2"), true); - CHECK_XPATH_BOOLEAN(c, STR("1!=1.00"), false); - CHECK_XPATH_BOOLEAN(c, STR("false()!=true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("true()!=false()"), true); - CHECK_XPATH_BOOLEAN(c, STR("false()!=false()"), false); - CHECK_XPATH_BOOLEAN(c, STR("'ace' != 'ace'"), false); - CHECK_XPATH_BOOLEAN(c, STR("'ace' != 'abc'"), true); - CHECK_XPATH_BOOLEAN(c, STR("'H' != ' H'"), true); - CHECK_XPATH_BOOLEAN(c, STR("'H' != 'H '"), true); - CHECK_XPATH_BOOLEAN(c, STR("1.9999999 < 2.0"), true); - CHECK_XPATH_BOOLEAN(c, STR("2.0000001 < 2.0"), false); - CHECK_XPATH_BOOLEAN(c, STR("1.9999999 < 2"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 < 2.0"), false); - CHECK_XPATH_BOOLEAN(c, STR("'001' = 1"), true); - CHECK_XPATH_BOOLEAN(c, STR("0=false()"), true); - CHECK_XPATH_BOOLEAN(c, STR("'0'=true()"), true); -} - -TEST_XML(xpath_xalan_boolean_4, "foobarfoobarfoo") -{ - CHECK_XPATH_BOOLEAN(doc, STR("avj/*='foo'"), true); - CHECK_XPATH_BOOLEAN(doc, STR("not(avj/*='foo')"), false); - CHECK_XPATH_BOOLEAN(doc, STR("avj/*!='foo'"), true); - CHECK_XPATH_BOOLEAN(doc, STR("not(avj/*!='foo')"), false); - - CHECK_XPATH_BOOLEAN(doc, STR("avj/k='foo'"), false); - CHECK_XPATH_BOOLEAN(doc, STR("not(avj/k='foo')"), true); - CHECK_XPATH_BOOLEAN(doc, STR("avj/k!='foo'"), false); - CHECK_XPATH_BOOLEAN(doc, STR("not(avj/k!='foo')"), true); -} - -TEST_XML(xpath_xalan_boolean_5, "firstsecondthirdfourth") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] = j[@w='33']"), true); - CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] = j[@l='17']"), false); - CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] = j[.='first' or @w='45']"), true); - - CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] != j[@w='33']"), true); - CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] != j[@l='17']"), true); - CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] != j[.='first' or @w='45']"), true); - CHECK_XPATH_BOOLEAN(c, STR("j[@l='16'] != j[@w='78']"), false); -} - -TEST_XML(xpath_xalan_boolean_6, "12345678") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_BOOLEAN(c, STR("avj/good/*=34"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(avj/good/*=34)"), false); - CHECK_XPATH_BOOLEAN(c, STR("avj/good/*!=34"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(avj/good/*!=34)"), false); - - CHECK_XPATH_BOOLEAN(c, STR("34=avj/good/*"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(34=avj/good/*)"), false); - CHECK_XPATH_BOOLEAN(c, STR("34!=avj/good/*"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(34!=avj/good/*)"), false); -} - -TEST_XML(xpath_xalan_boolean_7, "truefalse?10") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_BOOLEAN(c, STR("avj/bool/*=true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(avj/bool/*=true())"), false); - CHECK_XPATH_BOOLEAN(c, STR("avj/bool/*!=true()"), false); - CHECK_XPATH_BOOLEAN(c, STR("not(avj/bool/*!=true())"), true); - - CHECK_XPATH_BOOLEAN(c, STR("true()=avj/bool/*"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(true()=avj/bool/*)"), false); - CHECK_XPATH_BOOLEAN(c, STR("true()!=avj/bool/*"), false); - CHECK_XPATH_BOOLEAN(c, STR("not(true()!=avj/bool/*)"), true); - - CHECK_XPATH_BOOLEAN(c, STR("avj/none/*=true()"), false); - CHECK_XPATH_BOOLEAN(c, STR("not(avj/none/*=true())"), true); - CHECK_XPATH_BOOLEAN(c, STR("avj/none/*!=true()"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(avj/none/*!=true())"), false); - - CHECK_XPATH_BOOLEAN(c, STR("true()=avj/none/*"), false); - CHECK_XPATH_BOOLEAN(c, STR("not(true()=avj/none/*)"), true); - CHECK_XPATH_BOOLEAN(c, STR("true()!=avj/none/*"), true); - CHECK_XPATH_BOOLEAN(c, STR("not(true()!=avj/none/*)"), false); -} - -TEST_XML(xpath_xalan_conditional, "b") -{ - xml_node c; - - CHECK_XPATH_BOOLEAN(c, STR("(round(3.7) > 3)"), true); - CHECK_XPATH_BOOLEAN(c, STR("2 > 1"), true); - CHECK_XPATH_BOOLEAN(c, STR("9 mod 3 = 0"), true); - CHECK_XPATH_BOOLEAN(c, STR("'a'='a'"), true); - CHECK_XPATH_BOOLEAN(c, STR("2+2=4"), true); - - xml_node b = doc.child(STR("letters")).first_child(); - - CHECK_XPATH_BOOLEAN(b, STR(".='b'"), true); - CHECK_XPATH_BOOLEAN(b, STR("name(..)='letters'"), true); -} - -TEST_XML(xpath_xalan_math_1, "3") -{ - xml_node c; - - CHECK_XPATH_NUMBER(c, STR("number('1')"), 1); - CHECK_XPATH_NUMBER(c, STR("floor(0.0)"), 0); - CHECK_XPATH_NUMBER(c, STR("ceiling(0.0)"), 0); - CHECK_XPATH_NUMBER(c, STR("round(0.0)"), 0); - CHECK_XPATH_NUMBER(c, STR("2*3"), 6); - CHECK_XPATH_NUMBER(c, STR("3+6"), 9); - CHECK_XPATH_NUMBER(c, STR("3-1"), 2); - CHECK_XPATH_NUMBER_NAN(doc, STR("a-1")); // a-1 is a name test, not arithmetic expression - CHECK_XPATH_NUMBER(doc, STR("a -1"), 2); - CHECK_XPATH_NUMBER(c, STR("6 div 2"), 3); - CHECK_XPATH_NUMBER(c, STR("5 mod 2"), 1); - CHECK_XPATH_NUMBER_NAN(c, STR("number(n)")); - CHECK_XPATH_NUMBER(c, STR("number(2)"), 2); - CHECK_XPATH_NUMBER(c, STR("number('3')"), 3); - CHECK_XPATH_NUMBER_NAN(c, STR("number('')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('abc')")); - CHECK_XPATH_BOOLEAN(c, STR("number(string(1.0))=1"), true); - CHECK_XPATH_BOOLEAN(c, STR("number(true())=1"), true); - CHECK_XPATH_BOOLEAN(c, STR("number(false())=0"), true); - -#ifndef MSVC6_NAN_BUG - CHECK_XPATH_BOOLEAN(c, STR("number('xxx')=number('xxx')"), false); - CHECK_XPATH_BOOLEAN(c, STR("number('xxx')=0"), false); -#endif - - CHECK_XPATH_NUMBER(doc, STR("floor(a)"), 3); - CHECK_XPATH_NUMBER(c, STR("floor(1.9)"), 1); - CHECK_XPATH_NUMBER(c, STR("floor(2.999999)"), 2); - CHECK_XPATH_NUMBER(c, STR("floor(-1.5)"), -2); - CHECK_XPATH_BOOLEAN(c, STR("floor(1)=1"), true); - CHECK_XPATH_BOOLEAN(c, STR("floor(1.9)=1"), true); - CHECK_XPATH_BOOLEAN(c, STR("floor(-1.5)=-2"), true); - CHECK_XPATH_NUMBER(doc, STR("ceiling(a)"), 3); - CHECK_XPATH_NUMBER(c, STR("ceiling(1.54)"), 2); - CHECK_XPATH_NUMBER(c, STR("ceiling(2.999999)"), 3); - CHECK_XPATH_NUMBER(c, STR("ceiling(3.000001)"), 4); - CHECK_XPATH_BOOLEAN(c, STR("ceiling(1)=1"), true); - CHECK_XPATH_BOOLEAN(c, STR("ceiling(1.1)=2"), true); - CHECK_XPATH_BOOLEAN(c, STR("ceiling(-1.5)=-1"), true); -} - -TEST_XML(xpath_xalan_math_2, "3") -{ - xml_node c; - - CHECK_XPATH_NUMBER(doc, STR("round(a)"), 3); - CHECK_XPATH_NUMBER(c, STR("round(1.24)"), 1); - CHECK_XPATH_NUMBER(c, STR("round(2.999999)"), 3); - CHECK_XPATH_NUMBER(c, STR("round(3.000001)"), 3); - CHECK_XPATH_NUMBER(c, STR("round(1.1)"), 1); - CHECK_XPATH_NUMBER(c, STR("round(-1.1)"), -1); - CHECK_XPATH_NUMBER(c, STR("round(1.9)"), 2); - CHECK_XPATH_NUMBER(c, STR("round(-1.9)"), -2); - CHECK_XPATH_NUMBER(c, STR("round(1.5)"), 2); - CHECK_XPATH_NUMBER(c, STR("round(-1.5)"), -1); - CHECK_XPATH_NUMBER(c, STR("round(1.4999999)"), 1); - CHECK_XPATH_NUMBER(c, STR("round(-1.4999999)"), -1); - CHECK_XPATH_NUMBER(c, STR("round(1.5000001)"), 2); - CHECK_XPATH_NUMBER(c, STR("round(-1.5000001)"), -2); -} - -TEST_XML(xpath_xalan_math_3, "24517-58-37") -{ - CHECK_XPATH_NUMBER(doc, STR("sum(doc/x)"), 0); - CHECK_XPATH_NUMBER_NAN(doc, STR("sum(doc/n)")); - CHECK_XPATH_NUMBER(doc, STR("sum(doc/n[text()])"), 11); - CHECK_XPATH_NUMBER(doc, STR("sum(doc/n/@v)"), 9); - CHECK_XPATH_NUMBER(doc, STR("sum(doc/e)"), -17); -} - -TEST_XML(xpath_xalan_math_4, "231237212
5
2
") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NUMBER(c, STR("n1*n2"), 6); - CHECK_XPATH_NUMBER(c, STR("n1/@a*n2/@a"), 2); - CHECK_XPATH_NUMBER(c, STR("(n1/@a)*(n2/@a)"), 2); - CHECK_XPATH_NUMBER(c, STR("n1+n2"), 5); - CHECK_XPATH_NUMBER(c, STR("n1/@a+n2/@a"), 3); - CHECK_XPATH_NUMBER(c, STR("(n1/@a)+(n2/@a)"), 3); - CHECK_XPATH_NUMBER(c, STR("1-2"), -1); - CHECK_XPATH_NUMBER(c, STR("n1 - n2"), -1); - CHECK_XPATH_NUMBER(c, STR("n1-n2"), 123); - CHECK_XPATH_NUMBER(c, STR("n-1 - n-2"), 60); - CHECK_XPATH_NUMBER(c, STR("n-1 -n-2"), 60); - CHECK_XPATH_NUMBER(c, STR("7+-3"), 4); - CHECK_XPATH_NUMBER(c, STR("n-1+-n-2"), 60); - CHECK_XPATH_NUMBER(c, STR("7 - -3"), 10); - CHECK_XPATH_NUMBER(c, STR("n-1 - -n-2"), 84); - CHECK_XPATH_NUMBER(c, STR("-7 --3"), -4); - CHECK_XPATH_NUMBER(c, STR("-n-1 --n-2"), -60); - - CHECK_XPATH_FAIL(STR("+7")); - CHECK_XPATH_FAIL(STR("7++3")); - CHECK_XPATH_FAIL(STR("7-+3")); - - CHECK_XPATH_NUMBER(c, STR("6 div -2"), -3); - CHECK_XPATH_NUMBER(c, STR("n1 div n2"), 2.0 / 3.0); - CHECK_XPATH_NUMBER(c, STR("div div mod"), 2.5); - CHECK_XPATH_NUMBER(c, STR("div/@a div mod/@a"), 0.4); - - CHECK_XPATH_BOOLEAN(c, STR("1 div -0 = 2 div -0"), true); - CHECK_XPATH_BOOLEAN(c, STR("1 div -0 = 1 div 0"), false); - CHECK_XPATH_BOOLEAN(c, STR("1 div -0 = -1 div 0"), true); - -#ifndef MSVC6_NAN_BUG - CHECK_XPATH_BOOLEAN(c, STR("0 div 0 >= 0"), false); - CHECK_XPATH_BOOLEAN(c, STR("0 div 0 < 0"), false); -#endif - - CHECK_XPATH_NUMBER(c, STR("n1 mod n2"), 2); - CHECK_XPATH_NUMBER(c, STR("div mod mod"), 1); - CHECK_XPATH_NUMBER(c, STR("div/@a mod mod/@a"), 2); - - CHECK_XPATH_BOOLEAN(c, STR("(5 mod 2 = 1) and (5 mod -2 = 1) and (-5 mod 2 = -1) and (-5 mod -2 = -1)"), true); -} - -TEST(xpath_xalan_math_5) -{ - xml_node c; - - CHECK_XPATH_NUMBER(c, STR("(((((('3'+5)*(3)+((('2')+2)*('1' - 6)))-('4' - '2'))+(-(4-6)))))"), 4); - CHECK_XPATH_NUMBER(c, STR("1*1*2*2*2*3*3*1*1*1*0.5*0.5"), 18); - CHECK_XPATH_NUMBER(c, STR("1440 div 2 div 2 div 6"), 60); - CHECK_XPATH_NUMBER(c, STR("1440 div 2 div 2 div 6 div 10"), 6); - CHECK_XPATH_NUMBER(c, STR("1440 div 2 div 2 div 6 div 10 div 3"), 2); - CHECK_XPATH_NUMBER(c, STR("(1*2*3*4*5*6)div 2 div 6 div 10 div 3"), 2); - CHECK_XPATH_NUMBER_NAN(c, STR("(2 + number('xxx'))")); - CHECK_XPATH_NUMBER_NAN(c, STR("2 * -number('xxx')")); - CHECK_XPATH_NUMBER_NAN(c, STR("2 - number('xxx')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('xxx') - 3")); - CHECK_XPATH_NUMBER_NAN(c, STR("2 div number('xxx')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('xxx') div 3")); - -#ifndef __BORLANDC__ // BCC fmod does not propagate NaN correctly - CHECK_XPATH_NUMBER_NAN(c, STR("2 mod number('xxx')")); - CHECK_XPATH_NUMBER_NAN(c, STR("number('xxx') mod 3")); -#endif - - CHECK_XPATH_NUMBER_NAN(c, STR("floor(number('xxx'))")); - CHECK_XPATH_NUMBER_NAN(c, STR("ceiling(number('xxx'))")); - CHECK_XPATH_NUMBER_NAN(c, STR("round(number('xxx'))")); - CHECK_XPATH_NUMBER(c, STR("10+5+25+20+15+50+35+40"), 200); - CHECK_XPATH_NUMBER(c, STR("100-9-7-4-17-18-5"), 40); - CHECK_XPATH_NUMBER(c, STR("3*2+5*4-4*2-1"), 17); - CHECK_XPATH_NUMBER(c, STR("6*5-8*2+5*2"), 24); - CHECK_XPATH_NUMBER(c, STR("10*5-4*2+6*1 -3*3"), 39); - - CHECK_XPATH_NUMBER(c, STR("(24 div 3 +2) div (40 div 8 -3)"), 5); - CHECK_XPATH_NUMBER(c, STR("80 div 2 + 12 div 2 - 4 div 2"), 44); - CHECK_XPATH_NUMBER(c, STR("70 div 10 - 18 div 6 + 10 div 2"), 9); - - CHECK_XPATH_NUMBER(c, STR("48 mod 17 - 2 mod 9 + 13 mod 5"), 15); - CHECK_XPATH_NUMBER(c, STR("56 mod round(5*2+1.444) - 6 mod 4 + 7 mod 4"), 2); - CHECK_XPATH_NUMBER(c, STR("(77 mod 10 + 5 mod 8) mod 10"), 2); -} - -TEST_XML(xpath_xalan_math_6, "37x") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NUMBER(c, STR("-(n1|n2)"), -3); - CHECK_XPATH_NUMBER(c, STR("-(n2|n1)"), -3); - CHECK_XPATH_BOOLEAN(c, STR("contains(number(n1), 'NaN')"), false); - CHECK_XPATH_BOOLEAN(c, STR("contains(number(n3), 'NaN')"), true); -} - -TEST_XML(xpath_xalan_math_7, "37x") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NUMBER(c, STR("-(n1|n2)"), -3); - CHECK_XPATH_NUMBER(c, STR("-(n2|n1)"), -3); - CHECK_XPATH_BOOLEAN(c, STR("contains(number(n1), 'NaN')"), false); - CHECK_XPATH_BOOLEAN(c, STR("contains(number(n3), 'NaN')"), true); -} - -TEST_XML(xpath_xalan_math_8, "0.0004") -{ - CHECK_XPATH_NUMBER(doc, STR("number(1.75)"), 1.75); - CHECK_XPATH_NUMBER(doc, STR("number(7 div 4)"), 1.75); - CHECK_XPATH_BOOLEAN(doc, STR("(number(1.75) = (7 div 4))"), true); - CHECK_XPATH_NUMBER(doc, STR("number(0.109375 * 16)"), 1.75); - CHECK_XPATH_BOOLEAN(doc, STR("(number(1.75) = (0.109375 * 16))"), true); - CHECK_XPATH_NUMBER(doc, STR("number(k)"), 0.0004); - CHECK_XPATH_NUMBER(doc, STR("number(4 div 10000)"), 0.0004); - CHECK_XPATH_BOOLEAN(doc, STR("(number(k) = (4 div 10000))"), true); - CHECK_XPATH_NUMBER(doc, STR("number(0.0001 * 4)"), 0.0004); - CHECK_XPATH_BOOLEAN(doc, STR("(number(k) = (0.0001 * 4))"), true); -} - -TEST(xpath_xalan_math_9) -{ - xml_node c; - - CHECK_XPATH_STRING(c, STR("string(number('0.0'))"), STR("0")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0'))"), STR("0")); - - CHECK_XPATH_STRING(c, STR("string(number('0.4'))"), STR("0.4")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.4'))"), STR("-0.4")); - - CHECK_XPATH_STRING(c, STR("string(number('4.0'))"), STR("4")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('4.0'))"), STR("-4")); - - CHECK_XPATH_STRING(c, STR("string(number('0.04'))"), STR("0.04")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.04'))"), STR("-0.04")); - - CHECK_XPATH_STRING(c, STR("string(number('0.004'))"), STR("0.004")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.004'))"), STR("-0.004")); - - CHECK_XPATH_STRING(c, STR("string(number('0.0004'))"), STR("0.0004")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0004'))"), STR("-0.0004")); - - CHECK_XPATH_STRING(c, STR("string(number('0.0000000000001'))"), STR("0.0000000000001")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0000000000001'))"), STR("-0.0000000000001")); - - CHECK_XPATH_STRING(c, STR("string(number('0.0000000000000000000000000001'))"), STR("0.0000000000000000000000000001")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0000000000000000000000000001'))"), STR("-0.0000000000000000000000000001")); - - CHECK_XPATH_STRING(c, STR("string(number('0.0000000000001000000000000001'))"), STR("0.0000000000001000000000000001")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0000000000001000000000000001'))"), STR("-0.0000000000001000000000000001")); - - CHECK_XPATH_STRING(c, STR("string(number('0.0012'))"), STR("0.0012")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0012'))"), STR("-0.0012")); - - CHECK_XPATH_STRING(c, STR("string(number('0.012'))"), STR("0.012")); - CHECK_XPATH_STRING(c, STR("string(-1 * number('0.012'))"), STR("-0.012")); -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +TEST(xpath_xalan_boolean_1) +{ + xml_node c; + + CHECK_XPATH_BOOLEAN(c, STR("true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("true() and true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("true() or true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(true())"), false); + CHECK_XPATH_BOOLEAN(c, STR("boolean('')"), false); + CHECK_XPATH_BOOLEAN(c, STR("1>2"), false); + CHECK_XPATH_BOOLEAN(c, STR("1>=2"), false); + CHECK_XPATH_BOOLEAN(c, STR("false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("1=1"), true); + CHECK_XPATH_BOOLEAN(c, STR("1=2"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 = 1.00"), true); + CHECK_XPATH_BOOLEAN(c, STR("0 = -0"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 = '001'"), true); + CHECK_XPATH_BOOLEAN(c, STR("true()='0'"), true); + CHECK_XPATH_BOOLEAN(c, STR("false()=''"), true); + CHECK_XPATH_BOOLEAN(c, STR("true()=2"), true); + CHECK_XPATH_BOOLEAN(c, STR("false()=0"), true); + CHECK_XPATH_BOOLEAN(c, STR("false() and false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("'foo' and 'fop'"), true); + CHECK_XPATH_BOOLEAN(c, STR("true() and false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("false() and true()"), false); + CHECK_XPATH_BOOLEAN(c, STR("'1' and '0'"), true); + CHECK_XPATH_BOOLEAN(c, STR("true() or false()"), true); + CHECK_XPATH_BOOLEAN(c, STR("false() or true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("false() or false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 or ''"), false); + CHECK_XPATH_BOOLEAN(c, STR("not(false())"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(false() = false())"), false); + CHECK_XPATH_BOOLEAN(c, STR("not(true() = false())"), true); + CHECK_XPATH_BOOLEAN(c, STR("not('')"), true); + CHECK_XPATH_BOOLEAN(c, STR("not('0')"), false); + CHECK_XPATH_BOOLEAN(c, STR("boolean('0')"), true); + CHECK_XPATH_BOOLEAN(c, STR("boolean(0)"), false); + CHECK_XPATH_BOOLEAN(c, STR("boolean(-0)"), false); + CHECK_XPATH_BOOLEAN(c, STR("boolean(1)"), true); + CHECK_XPATH_BOOLEAN(c, STR("boolean(1 div 0)"), true); + CHECK_XPATH_BOOLEAN(c, STR("boolean(0 div 0)"), false); +} + +TEST_XML(xpath_xalan_boolean_2, "") +{ + CHECK_XPATH_BOOLEAN(doc, STR("boolean(doc)"), true); + CHECK_XPATH_BOOLEAN(doc, STR("boolean(foo)"), false); +} + +TEST(xpath_xalan_boolean_3) +{ + xml_node c; + + CHECK_XPATH_BOOLEAN(c, STR("1>1"), false); + CHECK_XPATH_BOOLEAN(c, STR("2>1"), true); + CHECK_XPATH_BOOLEAN(c, STR("1<2"), true); + CHECK_XPATH_BOOLEAN(c, STR("1<1"), false); + CHECK_XPATH_BOOLEAN(c, STR("2<1"), false); + CHECK_XPATH_BOOLEAN(c, STR("'2'>'1'"), true); + CHECK_XPATH_BOOLEAN(c, STR("0 > -0"), false); + CHECK_XPATH_BOOLEAN(c, STR("2>=2"), true); + CHECK_XPATH_BOOLEAN(c, STR("2>=1"), true); + CHECK_XPATH_BOOLEAN(c, STR("1<=2"), true); + CHECK_XPATH_BOOLEAN(c, STR("1<=1"), true); + CHECK_XPATH_BOOLEAN(c, STR("2<=1"), false); + CHECK_XPATH_BOOLEAN(c, STR("false() and 1 div 0"), false); + CHECK_XPATH_BOOLEAN(c, STR("true() or 1 div 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("1!=1"), false); + CHECK_XPATH_BOOLEAN(c, STR("1!=2"), true); + CHECK_XPATH_BOOLEAN(c, STR("1!=1.00"), false); + CHECK_XPATH_BOOLEAN(c, STR("false()!=true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("true()!=false()"), true); + CHECK_XPATH_BOOLEAN(c, STR("false()!=false()"), false); + CHECK_XPATH_BOOLEAN(c, STR("'ace' != 'ace'"), false); + CHECK_XPATH_BOOLEAN(c, STR("'ace' != 'abc'"), true); + CHECK_XPATH_BOOLEAN(c, STR("'H' != ' H'"), true); + CHECK_XPATH_BOOLEAN(c, STR("'H' != 'H '"), true); + CHECK_XPATH_BOOLEAN(c, STR("1.9999999 < 2.0"), true); + CHECK_XPATH_BOOLEAN(c, STR("2.0000001 < 2.0"), false); + CHECK_XPATH_BOOLEAN(c, STR("1.9999999 < 2"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 < 2.0"), false); + CHECK_XPATH_BOOLEAN(c, STR("'001' = 1"), true); + CHECK_XPATH_BOOLEAN(c, STR("0=false()"), true); + CHECK_XPATH_BOOLEAN(c, STR("'0'=true()"), true); +} + +TEST_XML(xpath_xalan_boolean_4, "foobarfoobarfoo") +{ + CHECK_XPATH_BOOLEAN(doc, STR("avj/*='foo'"), true); + CHECK_XPATH_BOOLEAN(doc, STR("not(avj/*='foo')"), false); + CHECK_XPATH_BOOLEAN(doc, STR("avj/*!='foo'"), true); + CHECK_XPATH_BOOLEAN(doc, STR("not(avj/*!='foo')"), false); + + CHECK_XPATH_BOOLEAN(doc, STR("avj/k='foo'"), false); + CHECK_XPATH_BOOLEAN(doc, STR("not(avj/k='foo')"), true); + CHECK_XPATH_BOOLEAN(doc, STR("avj/k!='foo'"), false); + CHECK_XPATH_BOOLEAN(doc, STR("not(avj/k!='foo')"), true); +} + +TEST_XML(xpath_xalan_boolean_5, "firstsecondthirdfourth") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] = j[@w='33']"), true); + CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] = j[@l='17']"), false); + CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] = j[.='first' or @w='45']"), true); + + CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] != j[@w='33']"), true); + CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] != j[@l='17']"), true); + CHECK_XPATH_BOOLEAN(c, STR("j[@l='12'] != j[.='first' or @w='45']"), true); + CHECK_XPATH_BOOLEAN(c, STR("j[@l='16'] != j[@w='78']"), false); +} + +TEST_XML(xpath_xalan_boolean_6, "12345678") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_BOOLEAN(c, STR("avj/good/*=34"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(avj/good/*=34)"), false); + CHECK_XPATH_BOOLEAN(c, STR("avj/good/*!=34"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(avj/good/*!=34)"), false); + + CHECK_XPATH_BOOLEAN(c, STR("34=avj/good/*"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(34=avj/good/*)"), false); + CHECK_XPATH_BOOLEAN(c, STR("34!=avj/good/*"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(34!=avj/good/*)"), false); +} + +TEST_XML(xpath_xalan_boolean_7, "truefalse?10") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_BOOLEAN(c, STR("avj/bool/*=true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(avj/bool/*=true())"), false); + CHECK_XPATH_BOOLEAN(c, STR("avj/bool/*!=true()"), false); + CHECK_XPATH_BOOLEAN(c, STR("not(avj/bool/*!=true())"), true); + + CHECK_XPATH_BOOLEAN(c, STR("true()=avj/bool/*"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(true()=avj/bool/*)"), false); + CHECK_XPATH_BOOLEAN(c, STR("true()!=avj/bool/*"), false); + CHECK_XPATH_BOOLEAN(c, STR("not(true()!=avj/bool/*)"), true); + + CHECK_XPATH_BOOLEAN(c, STR("avj/none/*=true()"), false); + CHECK_XPATH_BOOLEAN(c, STR("not(avj/none/*=true())"), true); + CHECK_XPATH_BOOLEAN(c, STR("avj/none/*!=true()"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(avj/none/*!=true())"), false); + + CHECK_XPATH_BOOLEAN(c, STR("true()=avj/none/*"), false); + CHECK_XPATH_BOOLEAN(c, STR("not(true()=avj/none/*)"), true); + CHECK_XPATH_BOOLEAN(c, STR("true()!=avj/none/*"), true); + CHECK_XPATH_BOOLEAN(c, STR("not(true()!=avj/none/*)"), false); +} + +TEST_XML(xpath_xalan_conditional, "b") +{ + xml_node c; + + CHECK_XPATH_BOOLEAN(c, STR("(round(3.7) > 3)"), true); + CHECK_XPATH_BOOLEAN(c, STR("2 > 1"), true); + CHECK_XPATH_BOOLEAN(c, STR("9 mod 3 = 0"), true); + CHECK_XPATH_BOOLEAN(c, STR("'a'='a'"), true); + CHECK_XPATH_BOOLEAN(c, STR("2+2=4"), true); + + xml_node b = doc.child(STR("letters")).first_child(); + + CHECK_XPATH_BOOLEAN(b, STR(".='b'"), true); + CHECK_XPATH_BOOLEAN(b, STR("name(..)='letters'"), true); +} + +TEST_XML(xpath_xalan_math_1, "3") +{ + xml_node c; + + CHECK_XPATH_NUMBER(c, STR("number('1')"), 1); + CHECK_XPATH_NUMBER(c, STR("floor(0.0)"), 0); + CHECK_XPATH_NUMBER(c, STR("ceiling(0.0)"), 0); + CHECK_XPATH_NUMBER(c, STR("round(0.0)"), 0); + CHECK_XPATH_NUMBER(c, STR("2*3"), 6); + CHECK_XPATH_NUMBER(c, STR("3+6"), 9); + CHECK_XPATH_NUMBER(c, STR("3-1"), 2); + CHECK_XPATH_NUMBER_NAN(doc, STR("a-1")); // a-1 is a name test, not arithmetic expression + CHECK_XPATH_NUMBER(doc, STR("a -1"), 2); + CHECK_XPATH_NUMBER(c, STR("6 div 2"), 3); + CHECK_XPATH_NUMBER(c, STR("5 mod 2"), 1); + CHECK_XPATH_NUMBER_NAN(c, STR("number(n)")); + CHECK_XPATH_NUMBER(c, STR("number(2)"), 2); + CHECK_XPATH_NUMBER(c, STR("number('3')"), 3); + CHECK_XPATH_NUMBER_NAN(c, STR("number('')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('abc')")); + CHECK_XPATH_BOOLEAN(c, STR("number(string(1.0))=1"), true); + CHECK_XPATH_BOOLEAN(c, STR("number(true())=1"), true); + CHECK_XPATH_BOOLEAN(c, STR("number(false())=0"), true); + +#ifndef MSVC6_NAN_BUG + CHECK_XPATH_BOOLEAN(c, STR("number('xxx')=number('xxx')"), false); + CHECK_XPATH_BOOLEAN(c, STR("number('xxx')=0"), false); +#endif + + CHECK_XPATH_NUMBER(doc, STR("floor(a)"), 3); + CHECK_XPATH_NUMBER(c, STR("floor(1.9)"), 1); + CHECK_XPATH_NUMBER(c, STR("floor(2.999999)"), 2); + CHECK_XPATH_NUMBER(c, STR("floor(-1.5)"), -2); + CHECK_XPATH_BOOLEAN(c, STR("floor(1)=1"), true); + CHECK_XPATH_BOOLEAN(c, STR("floor(1.9)=1"), true); + CHECK_XPATH_BOOLEAN(c, STR("floor(-1.5)=-2"), true); + CHECK_XPATH_NUMBER(doc, STR("ceiling(a)"), 3); + CHECK_XPATH_NUMBER(c, STR("ceiling(1.54)"), 2); + CHECK_XPATH_NUMBER(c, STR("ceiling(2.999999)"), 3); + CHECK_XPATH_NUMBER(c, STR("ceiling(3.000001)"), 4); + CHECK_XPATH_BOOLEAN(c, STR("ceiling(1)=1"), true); + CHECK_XPATH_BOOLEAN(c, STR("ceiling(1.1)=2"), true); + CHECK_XPATH_BOOLEAN(c, STR("ceiling(-1.5)=-1"), true); +} + +TEST_XML(xpath_xalan_math_2, "3") +{ + xml_node c; + + CHECK_XPATH_NUMBER(doc, STR("round(a)"), 3); + CHECK_XPATH_NUMBER(c, STR("round(1.24)"), 1); + CHECK_XPATH_NUMBER(c, STR("round(2.999999)"), 3); + CHECK_XPATH_NUMBER(c, STR("round(3.000001)"), 3); + CHECK_XPATH_NUMBER(c, STR("round(1.1)"), 1); + CHECK_XPATH_NUMBER(c, STR("round(-1.1)"), -1); + CHECK_XPATH_NUMBER(c, STR("round(1.9)"), 2); + CHECK_XPATH_NUMBER(c, STR("round(-1.9)"), -2); + CHECK_XPATH_NUMBER(c, STR("round(1.5)"), 2); + CHECK_XPATH_NUMBER(c, STR("round(-1.5)"), -1); + CHECK_XPATH_NUMBER(c, STR("round(1.4999999)"), 1); + CHECK_XPATH_NUMBER(c, STR("round(-1.4999999)"), -1); + CHECK_XPATH_NUMBER(c, STR("round(1.5000001)"), 2); + CHECK_XPATH_NUMBER(c, STR("round(-1.5000001)"), -2); +} + +TEST_XML(xpath_xalan_math_3, "24517-58-37") +{ + CHECK_XPATH_NUMBER(doc, STR("sum(doc/x)"), 0); + CHECK_XPATH_NUMBER_NAN(doc, STR("sum(doc/n)")); + CHECK_XPATH_NUMBER(doc, STR("sum(doc/n[text()])"), 11); + CHECK_XPATH_NUMBER(doc, STR("sum(doc/n/@v)"), 9); + CHECK_XPATH_NUMBER(doc, STR("sum(doc/e)"), -17); +} + +TEST_XML(xpath_xalan_math_4, "231237212
5
2
") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NUMBER(c, STR("n1*n2"), 6); + CHECK_XPATH_NUMBER(c, STR("n1/@a*n2/@a"), 2); + CHECK_XPATH_NUMBER(c, STR("(n1/@a)*(n2/@a)"), 2); + CHECK_XPATH_NUMBER(c, STR("n1+n2"), 5); + CHECK_XPATH_NUMBER(c, STR("n1/@a+n2/@a"), 3); + CHECK_XPATH_NUMBER(c, STR("(n1/@a)+(n2/@a)"), 3); + CHECK_XPATH_NUMBER(c, STR("1-2"), -1); + CHECK_XPATH_NUMBER(c, STR("n1 - n2"), -1); + CHECK_XPATH_NUMBER(c, STR("n1-n2"), 123); + CHECK_XPATH_NUMBER(c, STR("n-1 - n-2"), 60); + CHECK_XPATH_NUMBER(c, STR("n-1 -n-2"), 60); + CHECK_XPATH_NUMBER(c, STR("7+-3"), 4); + CHECK_XPATH_NUMBER(c, STR("n-1+-n-2"), 60); + CHECK_XPATH_NUMBER(c, STR("7 - -3"), 10); + CHECK_XPATH_NUMBER(c, STR("n-1 - -n-2"), 84); + CHECK_XPATH_NUMBER(c, STR("-7 --3"), -4); + CHECK_XPATH_NUMBER(c, STR("-n-1 --n-2"), -60); + + CHECK_XPATH_FAIL(STR("+7")); + CHECK_XPATH_FAIL(STR("7++3")); + CHECK_XPATH_FAIL(STR("7-+3")); + + CHECK_XPATH_NUMBER(c, STR("6 div -2"), -3); + CHECK_XPATH_NUMBER(c, STR("n1 div n2"), 2.0 / 3.0); + CHECK_XPATH_NUMBER(c, STR("div div mod"), 2.5); + CHECK_XPATH_NUMBER(c, STR("div/@a div mod/@a"), 0.4); + + CHECK_XPATH_BOOLEAN(c, STR("1 div -0 = 2 div -0"), true); + CHECK_XPATH_BOOLEAN(c, STR("1 div -0 = 1 div 0"), false); + CHECK_XPATH_BOOLEAN(c, STR("1 div -0 = -1 div 0"), true); + +#ifndef MSVC6_NAN_BUG + CHECK_XPATH_BOOLEAN(c, STR("0 div 0 >= 0"), false); + CHECK_XPATH_BOOLEAN(c, STR("0 div 0 < 0"), false); +#endif + + CHECK_XPATH_NUMBER(c, STR("n1 mod n2"), 2); + CHECK_XPATH_NUMBER(c, STR("div mod mod"), 1); + CHECK_XPATH_NUMBER(c, STR("div/@a mod mod/@a"), 2); + + CHECK_XPATH_BOOLEAN(c, STR("(5 mod 2 = 1) and (5 mod -2 = 1) and (-5 mod 2 = -1) and (-5 mod -2 = -1)"), true); +} + +TEST(xpath_xalan_math_5) +{ + xml_node c; + + CHECK_XPATH_NUMBER(c, STR("(((((('3'+5)*(3)+((('2')+2)*('1' - 6)))-('4' - '2'))+(-(4-6)))))"), 4); + CHECK_XPATH_NUMBER(c, STR("1*1*2*2*2*3*3*1*1*1*0.5*0.5"), 18); + CHECK_XPATH_NUMBER(c, STR("1440 div 2 div 2 div 6"), 60); + CHECK_XPATH_NUMBER(c, STR("1440 div 2 div 2 div 6 div 10"), 6); + CHECK_XPATH_NUMBER(c, STR("1440 div 2 div 2 div 6 div 10 div 3"), 2); + CHECK_XPATH_NUMBER(c, STR("(1*2*3*4*5*6)div 2 div 6 div 10 div 3"), 2); + CHECK_XPATH_NUMBER_NAN(c, STR("(2 + number('xxx'))")); + CHECK_XPATH_NUMBER_NAN(c, STR("2 * -number('xxx')")); + CHECK_XPATH_NUMBER_NAN(c, STR("2 - number('xxx')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('xxx') - 3")); + CHECK_XPATH_NUMBER_NAN(c, STR("2 div number('xxx')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('xxx') div 3")); + +#ifndef __BORLANDC__ // BCC fmod does not propagate NaN correctly + CHECK_XPATH_NUMBER_NAN(c, STR("2 mod number('xxx')")); + CHECK_XPATH_NUMBER_NAN(c, STR("number('xxx') mod 3")); +#endif + + CHECK_XPATH_NUMBER_NAN(c, STR("floor(number('xxx'))")); + CHECK_XPATH_NUMBER_NAN(c, STR("ceiling(number('xxx'))")); + CHECK_XPATH_NUMBER_NAN(c, STR("round(number('xxx'))")); + CHECK_XPATH_NUMBER(c, STR("10+5+25+20+15+50+35+40"), 200); + CHECK_XPATH_NUMBER(c, STR("100-9-7-4-17-18-5"), 40); + CHECK_XPATH_NUMBER(c, STR("3*2+5*4-4*2-1"), 17); + CHECK_XPATH_NUMBER(c, STR("6*5-8*2+5*2"), 24); + CHECK_XPATH_NUMBER(c, STR("10*5-4*2+6*1 -3*3"), 39); + + CHECK_XPATH_NUMBER(c, STR("(24 div 3 +2) div (40 div 8 -3)"), 5); + CHECK_XPATH_NUMBER(c, STR("80 div 2 + 12 div 2 - 4 div 2"), 44); + CHECK_XPATH_NUMBER(c, STR("70 div 10 - 18 div 6 + 10 div 2"), 9); + + CHECK_XPATH_NUMBER(c, STR("48 mod 17 - 2 mod 9 + 13 mod 5"), 15); + CHECK_XPATH_NUMBER(c, STR("56 mod round(5*2+1.444) - 6 mod 4 + 7 mod 4"), 2); + CHECK_XPATH_NUMBER(c, STR("(77 mod 10 + 5 mod 8) mod 10"), 2); +} + +TEST_XML(xpath_xalan_math_6, "37x") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NUMBER(c, STR("-(n1|n2)"), -3); + CHECK_XPATH_NUMBER(c, STR("-(n2|n1)"), -3); + CHECK_XPATH_BOOLEAN(c, STR("contains(number(n1), 'NaN')"), false); + CHECK_XPATH_BOOLEAN(c, STR("contains(number(n3), 'NaN')"), true); +} + +TEST_XML(xpath_xalan_math_7, "37x") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NUMBER(c, STR("-(n1|n2)"), -3); + CHECK_XPATH_NUMBER(c, STR("-(n2|n1)"), -3); + CHECK_XPATH_BOOLEAN(c, STR("contains(number(n1), 'NaN')"), false); + CHECK_XPATH_BOOLEAN(c, STR("contains(number(n3), 'NaN')"), true); +} + +TEST_XML(xpath_xalan_math_8, "0.0004") +{ + CHECK_XPATH_NUMBER(doc, STR("number(1.75)"), 1.75); + CHECK_XPATH_NUMBER(doc, STR("number(7 div 4)"), 1.75); + CHECK_XPATH_BOOLEAN(doc, STR("(number(1.75) = (7 div 4))"), true); + CHECK_XPATH_NUMBER(doc, STR("number(0.109375 * 16)"), 1.75); + CHECK_XPATH_BOOLEAN(doc, STR("(number(1.75) = (0.109375 * 16))"), true); + CHECK_XPATH_NUMBER(doc, STR("number(k)"), 0.0004); + CHECK_XPATH_NUMBER(doc, STR("number(4 div 10000)"), 0.0004); + CHECK_XPATH_BOOLEAN(doc, STR("(number(k) = (4 div 10000))"), true); + CHECK_XPATH_NUMBER(doc, STR("number(0.0001 * 4)"), 0.0004); + CHECK_XPATH_BOOLEAN(doc, STR("(number(k) = (0.0001 * 4))"), true); +} + +TEST(xpath_xalan_math_9) +{ + xml_node c; + + CHECK_XPATH_STRING(c, STR("string(number('0.0'))"), STR("0")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0'))"), STR("0")); + + CHECK_XPATH_STRING(c, STR("string(number('0.4'))"), STR("0.4")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.4'))"), STR("-0.4")); + + CHECK_XPATH_STRING(c, STR("string(number('4.0'))"), STR("4")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('4.0'))"), STR("-4")); + + CHECK_XPATH_STRING(c, STR("string(number('0.04'))"), STR("0.04")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.04'))"), STR("-0.04")); + + CHECK_XPATH_STRING(c, STR("string(number('0.004'))"), STR("0.004")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.004'))"), STR("-0.004")); + + CHECK_XPATH_STRING(c, STR("string(number('0.0004'))"), STR("0.0004")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0004'))"), STR("-0.0004")); + + CHECK_XPATH_STRING(c, STR("string(number('0.0000000000001'))"), STR("0.0000000000001")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0000000000001'))"), STR("-0.0000000000001")); + + CHECK_XPATH_STRING(c, STR("string(number('0.0000000000000000000000000001'))"), STR("0.0000000000000000000000000001")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0000000000000000000000000001'))"), STR("-0.0000000000000000000000000001")); + + CHECK_XPATH_STRING(c, STR("string(number('0.0000000000001000000000000001'))"), STR("0.0000000000001000000000000001")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0000000000001000000000000001'))"), STR("-0.0000000000001000000000000001")); + + CHECK_XPATH_STRING(c, STR("string(number('0.0012'))"), STR("0.0012")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.0012'))"), STR("-0.0012")); + + CHECK_XPATH_STRING(c, STR("string(number('0.012'))"), STR("0.012")); + CHECK_XPATH_STRING(c, STR("string(-1 * number('0.012'))"), STR("-0.012")); +} + +#endif diff --git a/tests/test_xpath_xalan_2.cpp b/tests/test_xpath_xalan_2.cpp index abc6a1c..aa8ae17 100644 --- a/tests/test_xpath_xalan_2.cpp +++ b/tests/test_xpath_xalan_2.cpp @@ -1,399 +1,399 @@ -#ifndef PUGIXML_NO_XPATH - -#define _CRT_SECURE_NO_WARNINGS - -#include "common.hpp" - -#include - -TEST_XML(xpath_xalan_string_1, "ENCYCLOPEDIA") -{ - xml_node c; - - CHECK_XPATH_NUMBER(c, STR("string-length('This is a test')"), 14); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('ENCYCLOPEDIA', 'ENCY')"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('ENCYCLOPEDIA', 'CYCL')"), true); - CHECK_XPATH_STRING(c, STR("substring-before('1999/04/01', '/')"), STR("1999")); - CHECK_XPATH_STRING(c, STR("substring-after('1999/04/01', '/')"), STR("04/01")); - CHECK_XPATH_STRING(c, STR("normalize-space('\t\n\r\n ab\n cd\t\n\r\n ef\t\n\r ')"), STR("ab cd ef")); - CHECK_XPATH_STRING(c, STR("translate(\"bar\",\"abc\",\"ABC\")"), STR("BAr")); - CHECK_XPATH_STRING(c, STR("concat(\"x\",\"yz\")"), STR("xyz")); - CHECK_XPATH_STRING(c, STR("substring('1999/04/01', 1, 4)"), STR("1999")); - CHECK_XPATH_STRING(c, STR("substring('12345', 1.5, 2.6)"), STR("234")); - CHECK_XPATH_STRING(c, STR("substring('12345', 0, 3)"), STR("12")); - CHECK_XPATH_STRING(c, STR("substring('12345', 0 div 0, 3)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('12345', 1, 0 div 0)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('12345', -42, 1 div 0)"), STR("12345")); - CHECK_XPATH_STRING(c, STR("substring('12345', -1 div 0, 1 div 0)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring(foo, 12, 3)"), STR("")); - CHECK_XPATH_STRING(c, STR("string(foo)"), STR("")); - CHECK_XPATH_STRING(c, STR("string(0)"), STR("0")); - CHECK_XPATH_STRING(c, STR("string(2)"), STR("2")); - CHECK_XPATH_STRING(c, STR("string('test')"), STR("test")); - CHECK_XPATH_STRING(c, STR("string('')"), STR("")); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('ENCYCLOPEDIA', 'EN')"), true); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('ENCYCLOPEDIA', 'en')"), false); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('ab', 'abc')"), false); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', 'bc')"), false); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', '')"), true); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('', '')"), true); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('', 'abc')"), false); - CHECK_XPATH_BOOLEAN(c, STR("starts-with('true()', 'tr')"), true); - CHECK_XPATH_BOOLEAN(c, STR("starts-with(foo, 'EN')"), false); - CHECK_XPATH_BOOLEAN(doc, STR("starts-with(doc, 'EN')"), true); - CHECK_XPATH_BOOLEAN(doc, STR("starts-with(doc/@a, 'EN')"), false); - CHECK_XPATH_BOOLEAN(doc, STR("starts-with(doc/@a, 'te')"), true); -} - -TEST_XML_FLAGS(xpath_xalan_string_2, "\n \n \n b\n c\n d\n e\n \n \n w\n x\n y\n z\n \n \n", parse_default | parse_ws_pcdata) -{ - CHECK_XPATH_STRING(doc, STR("string(doc/av//*)"), STR("\n b\n c\n d\n e\n ")); - CHECK_XPATH_STRING(doc, STR("normalize-space(string(doc/av//*))"), STR("b c d e")); - CHECK_XPATH_STRING(doc, STR("normalize-space('This is a test')"), STR("This is a test")); -} - -TEST_XML(xpath_xalan_string_3, "ENCYCLOPEDIA") -{ - xml_node c; - - CHECK_XPATH_BOOLEAN(c, STR("contains('ENCYCLOPEDIA', 'TEST')"), false); - CHECK_XPATH_BOOLEAN(c, STR("contains('ENCYCLOPEDIA', 'CYCL')"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('ENCYCLOPEDIA', 'cycl')"), false); - CHECK_XPATH_BOOLEAN(doc, STR("contains(concat(.,'BC'),concat('A','B','C'))"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('ab', 'abc')"), false); - CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'bc')"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'bcd')"), false); - CHECK_XPATH_BOOLEAN(c, STR("contains('abc', '')"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('', '')"), true); - CHECK_XPATH_BOOLEAN(c, STR("contains('', 'abc')"), false); - CHECK_XPATH_BOOLEAN(c, STR("contains('true()', 'e')"), true); - CHECK_XPATH_BOOLEAN(doc, STR("contains(., 'CYCL')"), true); - CHECK_XPATH_BOOLEAN(doc, STR("contains(., 'TEST')"), false); - CHECK_XPATH_BOOLEAN(doc, STR("contains(doc/@a, 'es')"), true); - CHECK_XPATH_BOOLEAN(doc, STR("contains(doc/@a, 'T')"), false); - CHECK_XPATH_STRING(c, STR("substring-before('ENCYCLOPEDIA', '/')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-before('ENCYCLOPEDIA', 'C')"), STR("EN")); - CHECK_XPATH_STRING(c, STR("substring-before('ENCYCLOPEDIA', 'c')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-before('ENCYCLOPEDIA', '')"), STR("")); - CHECK_XPATH_STRING(doc, STR("substring-before(., '/')"), STR("")); - CHECK_XPATH_STRING(doc, STR("substring-before(., 'C')"), STR("EN")); - CHECK_XPATH_STRING(doc, STR("substring-before(foo, '')"), STR("")); - CHECK_XPATH_STRING(doc, STR("substring-before(doc/@a, '/')"), STR("")); - CHECK_XPATH_STRING(doc, STR("substring-before(doc/@a, 'e')"), STR("t")); - CHECK_XPATH_STRING(doc, STR("substring-before(doc/@a, 't')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-after('ENCYCLOPEDIA', '/')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-after('ENCYCLOPEDIA', 'C')"), STR("YCLOPEDIA")); - CHECK_XPATH_STRING(c, STR("substring-after('ENCYCLOPEDIA', 'c')"), STR("")); - CHECK_XPATH_STRING(c, STR("substring-after('ENCYCLOPEDIA', '')"), STR("ENCYCLOPEDIA")); - CHECK_XPATH_STRING(doc, STR("substring-after(., '/')"), STR("")); - CHECK_XPATH_STRING(doc, STR("substring-after(., 'C')"), STR("YCLOPEDIA")); - CHECK_XPATH_STRING(doc, STR("substring-after(foo, '')"), STR("")); - CHECK_XPATH_STRING(doc, STR("substring-after(doc/@a, '/')"), STR("")); - CHECK_XPATH_STRING(doc, STR("substring-after(doc/@a, 'e')"), STR("st")); - CHECK_XPATH_STRING(doc, STR("substring-after(doc/@a, 't')"), STR("est")); - CHECK_XPATH_STRING(doc, STR("substring-after(doc/@a, 'st')"), STR("")); -} - -TEST_XML(xpath_xalan_string_4, "abcdefwhat's up") -{ - xml_node c; - - CHECK_XPATH_STRING(c, STR("translate('BAR','abc','ABC')"), STR("BAR")); - CHECK_XPATH_STRING(c, STR("translate('bar','RAB','xyz')"), STR("bar")); - CHECK_XPATH_STRING(c, STR("translate('BAR','Rab','TxX')"), STR("BAT")); - CHECK_XPATH_STRING(c, STR("translate('zzaaazzz','abcz','ABC')"), STR("AAA")); - CHECK_XPATH_STRING(c, STR("translate('ddaaadddd','abcd','ABCxy')"), STR("xxAAAxxxx")); - CHECK_XPATH_STRING(c, STR("concat('a','b','c','d','ef')"), STR("abcdef")); - CHECK_XPATH_STRING(c, STR("concat(a, b)"), STR("")); - CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(a, b)"), STR("ab")); - CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(a, b, c, d, e)"), STR("abcdef")); - CHECK_XPATH_STRING(c, STR("concat('cd','34')"), STR("cd34")); - CHECK_XPATH_STRING(c, STR("concat('cd',34)"), STR("cd34")); - CHECK_XPATH_STRING(c, STR("concat('bc',string(23))"), STR("bc23")); - CHECK_XPATH_STRING(c, STR("concat(a,34)"), STR("34")); - CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(a,34)"), STR("a34")); - CHECK_XPATH_STRING(c, STR("concat(false(),'ly')"), STR("falsely")); - CHECK_XPATH_FAIL(STR("concat(/*)")); - CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(/*, '')"), STR("abcdefwhat's up")); - CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(/*, /*[@attr='whatsup'])"), STR("abcdefwhat's up")); - CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(/*, //*[@attr='whatsup'])"), STR("abcdefwhat's upwhat's up")); - CHECK_XPATH_STRING(c, STR("substring('ENCYCLOPEDIA', 8, 3)"), STR("PED")); - CHECK_XPATH_STRING(c, STR("substring('ENCYCLOPEDIA', 8)"), STR("PEDIA")); - CHECK_XPATH_STRING(c, STR("substring('abcdefghijk',0 div 0, 5)"), STR("")); - CHECK_XPATH_STRING(c, STR("substring('abcdefghijk',4, 6)"), STR("defghi")); - CHECK_XPATH_STRING(c, STR("substring('1999/04/01', 1, 0)"), STR("")); - CHECK_XPATH_STRING(c, STR("translate(normalize-space(' bar fly '), ' ', '_')"), STR("bar_fly")); - CHECK_XPATH_STRING(c, STR("translate('barter','abe','bao')"), STR("abrtor")); - CHECK_XPATH_STRING(c, STR("translate('barbarity', 'aeiouy', '******')"), STR("b*rb*r*t*")); - CHECK_XPATH_STRING(doc, STR("translate(cd, concat(\"aqu'\", '\"eos'), 'AQU-+EOS')"), STR("QUA-lit+y")); - CHECK_XPATH_STRING(c, STR("translate('quan+ti-ty', 'AQU-+EOS', concat(\"aqu'\", '\"eos'))"), STR("quan\"ti'ty")); -} - -static std::basic_string number_to_string(int number) -{ - std::basic_string result; - - while (number) - { - result = static_cast('0' + number % 10) + result; - number /= 10; - } - - return result; -} - -TEST(xpath_xalan_string_5) -{ - std::basic_string query = STR("concat("); - - for (int i = 1; i < 1000; ++i) - { - query += STR("concat('t',"); - query += number_to_string(i); - query += STR("), "); - } - - query += STR("'')"); - - std::basic_string expected; - - for (int j = 1; j < 1000; ++j) - { - expected += STR("t"); - expected += number_to_string(j); - } - - CHECK_XPATH_STRING(xml_node(), query.c_str(), expected.c_str()); -} - -TEST(xpath_xalan_string_6) -{ - xml_node c; - - CHECK_XPATH_STRING(c, STR("string(1)"), STR("1")); - CHECK_XPATH_STRING(c, STR("string(12)"), STR("12")); - CHECK_XPATH_STRING(c, STR("string(123)"), STR("123")); - CHECK_XPATH_STRING(c, STR("string(1234)"), STR("1234")); - CHECK_XPATH_STRING(c, STR("string(12345)"), STR("12345")); - CHECK_XPATH_STRING(c, STR("string(123456)"), STR("123456")); - CHECK_XPATH_STRING(c, STR("string(1234567)"), STR("1234567")); - CHECK_XPATH_STRING(c, STR("string(12345678)"), STR("12345678")); - CHECK_XPATH_STRING(c, STR("string(123456789)"), STR("123456789")); - CHECK_XPATH_STRING(c, STR("string(1234567890)"), STR("1234567890")); - CHECK_XPATH_STRING(c, STR("string(12345678901)"), STR("12345678901")); - CHECK_XPATH_STRING(c, STR("string(123456789012)"), STR("123456789012")); - CHECK_XPATH_STRING(c, STR("string(1234567890123)"), STR("1234567890123")); - CHECK_XPATH_STRING(c, STR("string(12345678901234)"), STR("12345678901234")); - CHECK_XPATH_STRING(c, STR("string(123456789012345)"), STR("123456789012345")); - CHECK_XPATH_STRING(c, STR("string(1234567890123456)"), STR("1234567890123456")); - CHECK_XPATH_STRING(c, STR("string(-1)"), STR("-1")); - CHECK_XPATH_STRING(c, STR("string(-12)"), STR("-12")); - CHECK_XPATH_STRING(c, STR("string(-123)"), STR("-123")); - CHECK_XPATH_STRING(c, STR("string(-1234)"), STR("-1234")); - CHECK_XPATH_STRING(c, STR("string(-12345)"), STR("-12345")); - CHECK_XPATH_STRING(c, STR("string(-123456)"), STR("-123456")); - CHECK_XPATH_STRING(c, STR("string(-1234567)"), STR("-1234567")); - CHECK_XPATH_STRING(c, STR("string(-12345678)"), STR("-12345678")); - CHECK_XPATH_STRING(c, STR("string(-123456789)"), STR("-123456789")); - CHECK_XPATH_STRING(c, STR("string(-1234567890)"), STR("-1234567890")); - CHECK_XPATH_STRING(c, STR("string(-12345678901)"), STR("-12345678901")); - CHECK_XPATH_STRING(c, STR("string(-123456789012)"), STR("-123456789012")); - CHECK_XPATH_STRING(c, STR("string(-1234567890123)"), STR("-1234567890123")); - CHECK_XPATH_STRING(c, STR("string(-12345678901234)"), STR("-12345678901234")); - CHECK_XPATH_STRING(c, STR("string(-123456789012345)"), STR("-123456789012345")); - CHECK_XPATH_STRING(c, STR("string(-1234567890123456)"), STR("-1234567890123456")); -} - -#if 0 // $ this test requires round-to-nearest behavior in string->number conversion during parsing; atof gives us truncation -TEST(xpath_xalan_string_6_rounding) -{ - xml_node c; - - CHECK_XPATH_STRING(c, STR("string(12345678901234567)"), STR("12345678901234568")); - CHECK_XPATH_STRING(c, STR("string(123456789012345678)"), STR("123456789012345680")); - CHECK_XPATH_STRING(c, STR("string(-12345678901234567)"), STR("-12345678901234568")); - CHECK_XPATH_STRING(c, STR("string(-123456789012345678)"), STR("-123456789012345680")); -} -#endif - -TEST(xpath_xalan_string_7) -{ - xml_node c; - - CHECK_XPATH_STRING(c, STR("string(.1)"), STR("0.1")); - CHECK_XPATH_STRING(c, STR("string(.01)"), STR("0.01")); - CHECK_XPATH_STRING(c, STR("string(.012)"), STR("0.012")); - CHECK_XPATH_STRING(c, STR("string(.0123)"), STR("0.0123")); - CHECK_XPATH_STRING(c, STR("string(.01234)"), STR("0.01234")); - CHECK_XPATH_STRING(c, STR("string(.012345)"), STR("0.012345")); - CHECK_XPATH_STRING(c, STR("string(.0123456)"), STR("0.0123456")); - CHECK_XPATH_STRING(c, STR("string(.01234567)"), STR("0.01234567")); - CHECK_XPATH_STRING(c, STR("string(.012345678)"), STR("0.012345678")); - CHECK_XPATH_STRING(c, STR("string(.0123456789)"), STR("0.0123456789")); - CHECK_XPATH_STRING(c, STR("string(.10123456789)"), STR("0.10123456789")); - CHECK_XPATH_STRING(c, STR("string(.101234567892)"), STR("0.101234567892")); - CHECK_XPATH_STRING(c, STR("string(.1012345678923)"), STR("0.1012345678923")); - CHECK_XPATH_STRING(c, STR("string(.10123456789234)"), STR("0.10123456789234")); - CHECK_XPATH_STRING(c, STR("string(.101234567892345)"), STR("0.101234567892345")); - CHECK_XPATH_STRING(c, STR("string(.1012345678923456)"), STR("0.1012345678923456")); - CHECK_XPATH_STRING(c, STR("string(-.1)"), STR("-0.1")); - CHECK_XPATH_STRING(c, STR("string(-.01)"), STR("-0.01")); - CHECK_XPATH_STRING(c, STR("string(-.012)"), STR("-0.012")); - CHECK_XPATH_STRING(c, STR("string(-.0123)"), STR("-0.0123")); - CHECK_XPATH_STRING(c, STR("string(-.01234)"), STR("-0.01234")); - CHECK_XPATH_STRING(c, STR("string(-.012345)"), STR("-0.012345")); - CHECK_XPATH_STRING(c, STR("string(-.0123456)"), STR("-0.0123456")); - CHECK_XPATH_STRING(c, STR("string(-.01234567)"), STR("-0.01234567")); - CHECK_XPATH_STRING(c, STR("string(-.012345678)"), STR("-0.012345678")); - CHECK_XPATH_STRING(c, STR("string(-.0123456789)"), STR("-0.0123456789")); - CHECK_XPATH_STRING(c, STR("string(-.10123456789)"), STR("-0.10123456789")); - CHECK_XPATH_STRING(c, STR("string(-.101234567892)"), STR("-0.101234567892")); - CHECK_XPATH_STRING(c, STR("string(-.1012345678923)"), STR("-0.1012345678923")); - CHECK_XPATH_STRING(c, STR("string(-.10123456789234)"), STR("-0.10123456789234")); - CHECK_XPATH_STRING(c, STR("string(-.101234567892345)"), STR("-0.101234567892345")); - CHECK_XPATH_STRING(c, STR("string(-.1012345678923456)"), STR("-0.1012345678923456")); -} - -#if 0 // $ this test requires 16 decimal digits of mantissa in number->string conversion; we have 15 since only 15 is guaranteed, and 16 introduces 'garbage' digits in common cases like 0.4 -TEST(xpath_xalan_string_7_precision) -{ - xml_node c; - - CHECK_XPATH_STRING(c, STR("string(.10123456789234567)"), STR("0.10123456789234567")); - CHECK_XPATH_STRING(c, STR("string(.101234567892345678)"), STR("0.10123456789234568")); - CHECK_XPATH_STRING(c, STR("string(.1012345678923456789)"), STR("0.10123456789234568")); - CHECK_XPATH_STRING(c, STR("string(.10123456789234567893)"), STR("0.10123456789234568")); - CHECK_XPATH_STRING(c, STR("string(-.10123456789234567)"), STR("-0.10123456789234567")); - CHECK_XPATH_STRING(c, STR("string(-.101234567892345678)"), STR("-0.10123456789234568")); - CHECK_XPATH_STRING(c, STR("string(-.1012345678923456789)"), STR("-0.10123456789234568")); - CHECK_XPATH_STRING(c, STR("string(-.10123456789234567893)"), STR("-0.10123456789234568")); -} -#endif - -TEST(xpath_xalan_string_8) -{ - xml_node c; - - // $ originally all last digits were 5's; a fully compliant implementation should correctly convert those as well, - // however some of these failed because of atof truncation - CHECK_XPATH_STRING(c, STR("string(9.87654321012344)"), STR("9.87654321012344")); - CHECK_XPATH_STRING(c, STR("string(98.7654321012345)"), STR("98.7654321012345")); - CHECK_XPATH_STRING(c, STR("string(987.654321012345)"), STR("987.654321012345")); - CHECK_XPATH_STRING(c, STR("string(9876.54321012344)"), STR("9876.54321012344")); - CHECK_XPATH_STRING(c, STR("string(98765.4321012345)"), STR("98765.4321012345")); - CHECK_XPATH_STRING(c, STR("string(987654.321012345)"), STR("987654.321012345")); - CHECK_XPATH_STRING(c, STR("string(9876543.21012345)"), STR("9876543.21012345")); - CHECK_XPATH_STRING(c, STR("string(98765432.1012345)"), STR("98765432.1012345")); - CHECK_XPATH_STRING(c, STR("string(987654321.012345)"), STR("987654321.012345")); - CHECK_XPATH_STRING(c, STR("string(9876543210.12344)"), STR("9876543210.12344")); - CHECK_XPATH_STRING(c, STR("string(98765432101.2345)"), STR("98765432101.2345")); - CHECK_XPATH_STRING(c, STR("string(987654321012.345)"), STR("987654321012.345")); - CHECK_XPATH_STRING(c, STR("string(9876543210123.43)"), STR("9876543210123.43")); - CHECK_XPATH_STRING(c, STR("string(98765432101234.5)"), STR("98765432101234.5")); - - CHECK_XPATH_STRING(c, STR("string(-9.87654321012344)"), STR("-9.87654321012344")); - CHECK_XPATH_STRING(c, STR("string(-98.7654321012345)"), STR("-98.7654321012345")); - CHECK_XPATH_STRING(c, STR("string(-987.654321012345)"), STR("-987.654321012345")); - CHECK_XPATH_STRING(c, STR("string(-9876.54321012344)"), STR("-9876.54321012344")); - CHECK_XPATH_STRING(c, STR("string(-98765.4321012345)"), STR("-98765.4321012345")); - CHECK_XPATH_STRING(c, STR("string(-987654.321012345)"), STR("-987654.321012345")); - CHECK_XPATH_STRING(c, STR("string(-9876543.21012345)"), STR("-9876543.21012345")); - CHECK_XPATH_STRING(c, STR("string(-98765432.1012345)"), STR("-98765432.1012345")); - CHECK_XPATH_STRING(c, STR("string(-987654321.012345)"), STR("-987654321.012345")); - CHECK_XPATH_STRING(c, STR("string(-9876543210.12344)"), STR("-9876543210.12344")); - CHECK_XPATH_STRING(c, STR("string(-98765432101.2345)"), STR("-98765432101.2345")); - CHECK_XPATH_STRING(c, STR("string(-987654321012.345)"), STR("-987654321012.345")); - CHECK_XPATH_STRING(c, STR("string(-9876543210123.43)"), STR("-9876543210123.43")); - CHECK_XPATH_STRING(c, STR("string(-98765432101234.5)"), STR("-98765432101234.5")); -} - -TEST(xpath_xalan_string_9) -{ - xml_node c; - - CHECK_XPATH_STRING(c, STR("string(.123456789)"), STR("0.123456789")); - CHECK_XPATH_STRING(c, STR("string(.0123456789)"), STR("0.0123456789")); - CHECK_XPATH_STRING(c, STR("string(.00123456789)"), STR("0.00123456789")); - CHECK_XPATH_STRING(c, STR("string(.000123456789)"), STR("0.000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000123456789)"), STR("0.0000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000123456789)"), STR("0.00000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000123456789)"), STR("0.000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000123456789)"), STR("0.0000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000123456789)"), STR("0.00000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000123456789)"), STR("0.000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000123456789)"), STR("0.0000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000123456789)"), STR("0.00000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000123456789)"), STR("0.000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000123456789)"), STR("0.0000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000000123456789)"), STR("0.00000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000000123456789)"), STR("0.000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000000123456789)"), STR("0.0000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000000000123456789)"), STR("0.00000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000000000123456789)"), STR("0.000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000000000123456789)"), STR("0.0000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000000000000123456789)"), STR("0.00000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000000000000123456789)"), STR("0.000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000123456789)"), STR("0.0000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000123456789)"), STR("0.00000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000123456789)"), STR("0.000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000123456789)"), STR("0.0000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000123456789)"), STR("0.00000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000123456789)"), STR("0.000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000000123456789)"), STR("0.00000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000000123456789)"), STR("0.000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000000000123456789)"), STR("0.00000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000000000123456789)"), STR("0.000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000000000000123456789)"), STR("0.00000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000000000000123456789)"), STR("0.000000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000000000000000123456789)"), STR("0.00000000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000000000000000123456789)"), STR("0.000000000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000000000000000123456789")); - - CHECK_XPATH_STRING(c, STR("string(-.123456789)"), STR("-0.123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0123456789)"), STR("-0.0123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00123456789)"), STR("-0.00123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000123456789)"), STR("-0.000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000123456789)"), STR("-0.0000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000123456789)"), STR("-0.00000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000123456789)"), STR("-0.000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000123456789)"), STR("-0.0000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000123456789)"), STR("-0.00000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000123456789)"), STR("-0.000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000123456789)"), STR("-0.0000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000123456789)"), STR("-0.00000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000123456789)"), STR("-0.000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000123456789)"), STR("-0.0000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000000123456789)"), STR("-0.00000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000000123456789)"), STR("-0.000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000000123456789)"), STR("-0.0000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000000000123456789)"), STR("-0.00000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000000000123456789)"), STR("-0.000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000123456789)"), STR("-0.0000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000123456789)"), STR("-0.00000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000123456789)"), STR("-0.000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000123456789)"), STR("-0.0000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000123456789)"), STR("-0.00000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000123456789)"), STR("-0.000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000123456789)"), STR("-0.0000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000123456789)"), STR("-0.00000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000000123456789)"), STR("-0.00000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000000000123456789)"), STR("-0.00000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000000000000123456789)"), STR("-0.00000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000000000000000123456789)"), STR("-0.00000000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000000000000000123456789")); - CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000000000000000123456789")); -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#define _CRT_SECURE_NO_WARNINGS + +#include "common.hpp" + +#include + +TEST_XML(xpath_xalan_string_1, "ENCYCLOPEDIA") +{ + xml_node c; + + CHECK_XPATH_NUMBER(c, STR("string-length('This is a test')"), 14); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('ENCYCLOPEDIA', 'ENCY')"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('ENCYCLOPEDIA', 'CYCL')"), true); + CHECK_XPATH_STRING(c, STR("substring-before('1999/04/01', '/')"), STR("1999")); + CHECK_XPATH_STRING(c, STR("substring-after('1999/04/01', '/')"), STR("04/01")); + CHECK_XPATH_STRING(c, STR("normalize-space('\t\n\r\n ab\n cd\t\n\r\n ef\t\n\r ')"), STR("ab cd ef")); + CHECK_XPATH_STRING(c, STR("translate(\"bar\",\"abc\",\"ABC\")"), STR("BAr")); + CHECK_XPATH_STRING(c, STR("concat(\"x\",\"yz\")"), STR("xyz")); + CHECK_XPATH_STRING(c, STR("substring('1999/04/01', 1, 4)"), STR("1999")); + CHECK_XPATH_STRING(c, STR("substring('12345', 1.5, 2.6)"), STR("234")); + CHECK_XPATH_STRING(c, STR("substring('12345', 0, 3)"), STR("12")); + CHECK_XPATH_STRING(c, STR("substring('12345', 0 div 0, 3)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('12345', 1, 0 div 0)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('12345', -42, 1 div 0)"), STR("12345")); + CHECK_XPATH_STRING(c, STR("substring('12345', -1 div 0, 1 div 0)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring(foo, 12, 3)"), STR("")); + CHECK_XPATH_STRING(c, STR("string(foo)"), STR("")); + CHECK_XPATH_STRING(c, STR("string(0)"), STR("0")); + CHECK_XPATH_STRING(c, STR("string(2)"), STR("2")); + CHECK_XPATH_STRING(c, STR("string('test')"), STR("test")); + CHECK_XPATH_STRING(c, STR("string('')"), STR("")); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('ENCYCLOPEDIA', 'EN')"), true); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('ENCYCLOPEDIA', 'en')"), false); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('ab', 'abc')"), false); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', 'bc')"), false); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('abc', '')"), true); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('', '')"), true); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('', 'abc')"), false); + CHECK_XPATH_BOOLEAN(c, STR("starts-with('true()', 'tr')"), true); + CHECK_XPATH_BOOLEAN(c, STR("starts-with(foo, 'EN')"), false); + CHECK_XPATH_BOOLEAN(doc, STR("starts-with(doc, 'EN')"), true); + CHECK_XPATH_BOOLEAN(doc, STR("starts-with(doc/@a, 'EN')"), false); + CHECK_XPATH_BOOLEAN(doc, STR("starts-with(doc/@a, 'te')"), true); +} + +TEST_XML_FLAGS(xpath_xalan_string_2, "\n \n \n b\n c\n d\n e\n \n \n w\n x\n y\n z\n \n \n", parse_default | parse_ws_pcdata) +{ + CHECK_XPATH_STRING(doc, STR("string(doc/av//*)"), STR("\n b\n c\n d\n e\n ")); + CHECK_XPATH_STRING(doc, STR("normalize-space(string(doc/av//*))"), STR("b c d e")); + CHECK_XPATH_STRING(doc, STR("normalize-space('This is a test')"), STR("This is a test")); +} + +TEST_XML(xpath_xalan_string_3, "ENCYCLOPEDIA") +{ + xml_node c; + + CHECK_XPATH_BOOLEAN(c, STR("contains('ENCYCLOPEDIA', 'TEST')"), false); + CHECK_XPATH_BOOLEAN(c, STR("contains('ENCYCLOPEDIA', 'CYCL')"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('ENCYCLOPEDIA', 'cycl')"), false); + CHECK_XPATH_BOOLEAN(doc, STR("contains(concat(.,'BC'),concat('A','B','C'))"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('ab', 'abc')"), false); + CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'bc')"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('abc', 'bcd')"), false); + CHECK_XPATH_BOOLEAN(c, STR("contains('abc', '')"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('', '')"), true); + CHECK_XPATH_BOOLEAN(c, STR("contains('', 'abc')"), false); + CHECK_XPATH_BOOLEAN(c, STR("contains('true()', 'e')"), true); + CHECK_XPATH_BOOLEAN(doc, STR("contains(., 'CYCL')"), true); + CHECK_XPATH_BOOLEAN(doc, STR("contains(., 'TEST')"), false); + CHECK_XPATH_BOOLEAN(doc, STR("contains(doc/@a, 'es')"), true); + CHECK_XPATH_BOOLEAN(doc, STR("contains(doc/@a, 'T')"), false); + CHECK_XPATH_STRING(c, STR("substring-before('ENCYCLOPEDIA', '/')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-before('ENCYCLOPEDIA', 'C')"), STR("EN")); + CHECK_XPATH_STRING(c, STR("substring-before('ENCYCLOPEDIA', 'c')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-before('ENCYCLOPEDIA', '')"), STR("")); + CHECK_XPATH_STRING(doc, STR("substring-before(., '/')"), STR("")); + CHECK_XPATH_STRING(doc, STR("substring-before(., 'C')"), STR("EN")); + CHECK_XPATH_STRING(doc, STR("substring-before(foo, '')"), STR("")); + CHECK_XPATH_STRING(doc, STR("substring-before(doc/@a, '/')"), STR("")); + CHECK_XPATH_STRING(doc, STR("substring-before(doc/@a, 'e')"), STR("t")); + CHECK_XPATH_STRING(doc, STR("substring-before(doc/@a, 't')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-after('ENCYCLOPEDIA', '/')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-after('ENCYCLOPEDIA', 'C')"), STR("YCLOPEDIA")); + CHECK_XPATH_STRING(c, STR("substring-after('ENCYCLOPEDIA', 'c')"), STR("")); + CHECK_XPATH_STRING(c, STR("substring-after('ENCYCLOPEDIA', '')"), STR("ENCYCLOPEDIA")); + CHECK_XPATH_STRING(doc, STR("substring-after(., '/')"), STR("")); + CHECK_XPATH_STRING(doc, STR("substring-after(., 'C')"), STR("YCLOPEDIA")); + CHECK_XPATH_STRING(doc, STR("substring-after(foo, '')"), STR("")); + CHECK_XPATH_STRING(doc, STR("substring-after(doc/@a, '/')"), STR("")); + CHECK_XPATH_STRING(doc, STR("substring-after(doc/@a, 'e')"), STR("st")); + CHECK_XPATH_STRING(doc, STR("substring-after(doc/@a, 't')"), STR("est")); + CHECK_XPATH_STRING(doc, STR("substring-after(doc/@a, 'st')"), STR("")); +} + +TEST_XML(xpath_xalan_string_4, "abcdefwhat's up") +{ + xml_node c; + + CHECK_XPATH_STRING(c, STR("translate('BAR','abc','ABC')"), STR("BAR")); + CHECK_XPATH_STRING(c, STR("translate('bar','RAB','xyz')"), STR("bar")); + CHECK_XPATH_STRING(c, STR("translate('BAR','Rab','TxX')"), STR("BAT")); + CHECK_XPATH_STRING(c, STR("translate('zzaaazzz','abcz','ABC')"), STR("AAA")); + CHECK_XPATH_STRING(c, STR("translate('ddaaadddd','abcd','ABCxy')"), STR("xxAAAxxxx")); + CHECK_XPATH_STRING(c, STR("concat('a','b','c','d','ef')"), STR("abcdef")); + CHECK_XPATH_STRING(c, STR("concat(a, b)"), STR("")); + CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(a, b)"), STR("ab")); + CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(a, b, c, d, e)"), STR("abcdef")); + CHECK_XPATH_STRING(c, STR("concat('cd','34')"), STR("cd34")); + CHECK_XPATH_STRING(c, STR("concat('cd',34)"), STR("cd34")); + CHECK_XPATH_STRING(c, STR("concat('bc',string(23))"), STR("bc23")); + CHECK_XPATH_STRING(c, STR("concat(a,34)"), STR("34")); + CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(a,34)"), STR("a34")); + CHECK_XPATH_STRING(c, STR("concat(false(),'ly')"), STR("falsely")); + CHECK_XPATH_FAIL(STR("concat(/*)")); + CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(/*, '')"), STR("abcdefwhat's up")); + CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(/*, /*[@attr='whatsup'])"), STR("abcdefwhat's up")); + CHECK_XPATH_STRING(doc.child(STR("doc")), STR("concat(/*, //*[@attr='whatsup'])"), STR("abcdefwhat's upwhat's up")); + CHECK_XPATH_STRING(c, STR("substring('ENCYCLOPEDIA', 8, 3)"), STR("PED")); + CHECK_XPATH_STRING(c, STR("substring('ENCYCLOPEDIA', 8)"), STR("PEDIA")); + CHECK_XPATH_STRING(c, STR("substring('abcdefghijk',0 div 0, 5)"), STR("")); + CHECK_XPATH_STRING(c, STR("substring('abcdefghijk',4, 6)"), STR("defghi")); + CHECK_XPATH_STRING(c, STR("substring('1999/04/01', 1, 0)"), STR("")); + CHECK_XPATH_STRING(c, STR("translate(normalize-space(' bar fly '), ' ', '_')"), STR("bar_fly")); + CHECK_XPATH_STRING(c, STR("translate('barter','abe','bao')"), STR("abrtor")); + CHECK_XPATH_STRING(c, STR("translate('barbarity', 'aeiouy', '******')"), STR("b*rb*r*t*")); + CHECK_XPATH_STRING(doc, STR("translate(cd, concat(\"aqu'\", '\"eos'), 'AQU-+EOS')"), STR("QUA-lit+y")); + CHECK_XPATH_STRING(c, STR("translate('quan+ti-ty', 'AQU-+EOS', concat(\"aqu'\", '\"eos'))"), STR("quan\"ti'ty")); +} + +static std::basic_string number_to_string(int number) +{ + std::basic_string result; + + while (number) + { + result = static_cast('0' + number % 10) + result; + number /= 10; + } + + return result; +} + +TEST(xpath_xalan_string_5) +{ + std::basic_string query = STR("concat("); + + for (int i = 1; i < 1000; ++i) + { + query += STR("concat('t',"); + query += number_to_string(i); + query += STR("), "); + } + + query += STR("'')"); + + std::basic_string expected; + + for (int j = 1; j < 1000; ++j) + { + expected += STR("t"); + expected += number_to_string(j); + } + + CHECK_XPATH_STRING(xml_node(), query.c_str(), expected.c_str()); +} + +TEST(xpath_xalan_string_6) +{ + xml_node c; + + CHECK_XPATH_STRING(c, STR("string(1)"), STR("1")); + CHECK_XPATH_STRING(c, STR("string(12)"), STR("12")); + CHECK_XPATH_STRING(c, STR("string(123)"), STR("123")); + CHECK_XPATH_STRING(c, STR("string(1234)"), STR("1234")); + CHECK_XPATH_STRING(c, STR("string(12345)"), STR("12345")); + CHECK_XPATH_STRING(c, STR("string(123456)"), STR("123456")); + CHECK_XPATH_STRING(c, STR("string(1234567)"), STR("1234567")); + CHECK_XPATH_STRING(c, STR("string(12345678)"), STR("12345678")); + CHECK_XPATH_STRING(c, STR("string(123456789)"), STR("123456789")); + CHECK_XPATH_STRING(c, STR("string(1234567890)"), STR("1234567890")); + CHECK_XPATH_STRING(c, STR("string(12345678901)"), STR("12345678901")); + CHECK_XPATH_STRING(c, STR("string(123456789012)"), STR("123456789012")); + CHECK_XPATH_STRING(c, STR("string(1234567890123)"), STR("1234567890123")); + CHECK_XPATH_STRING(c, STR("string(12345678901234)"), STR("12345678901234")); + CHECK_XPATH_STRING(c, STR("string(123456789012345)"), STR("123456789012345")); + CHECK_XPATH_STRING(c, STR("string(1234567890123456)"), STR("1234567890123456")); + CHECK_XPATH_STRING(c, STR("string(-1)"), STR("-1")); + CHECK_XPATH_STRING(c, STR("string(-12)"), STR("-12")); + CHECK_XPATH_STRING(c, STR("string(-123)"), STR("-123")); + CHECK_XPATH_STRING(c, STR("string(-1234)"), STR("-1234")); + CHECK_XPATH_STRING(c, STR("string(-12345)"), STR("-12345")); + CHECK_XPATH_STRING(c, STR("string(-123456)"), STR("-123456")); + CHECK_XPATH_STRING(c, STR("string(-1234567)"), STR("-1234567")); + CHECK_XPATH_STRING(c, STR("string(-12345678)"), STR("-12345678")); + CHECK_XPATH_STRING(c, STR("string(-123456789)"), STR("-123456789")); + CHECK_XPATH_STRING(c, STR("string(-1234567890)"), STR("-1234567890")); + CHECK_XPATH_STRING(c, STR("string(-12345678901)"), STR("-12345678901")); + CHECK_XPATH_STRING(c, STR("string(-123456789012)"), STR("-123456789012")); + CHECK_XPATH_STRING(c, STR("string(-1234567890123)"), STR("-1234567890123")); + CHECK_XPATH_STRING(c, STR("string(-12345678901234)"), STR("-12345678901234")); + CHECK_XPATH_STRING(c, STR("string(-123456789012345)"), STR("-123456789012345")); + CHECK_XPATH_STRING(c, STR("string(-1234567890123456)"), STR("-1234567890123456")); +} + +#if 0 // $ this test requires round-to-nearest behavior in string->number conversion during parsing; atof gives us truncation +TEST(xpath_xalan_string_6_rounding) +{ + xml_node c; + + CHECK_XPATH_STRING(c, STR("string(12345678901234567)"), STR("12345678901234568")); + CHECK_XPATH_STRING(c, STR("string(123456789012345678)"), STR("123456789012345680")); + CHECK_XPATH_STRING(c, STR("string(-12345678901234567)"), STR("-12345678901234568")); + CHECK_XPATH_STRING(c, STR("string(-123456789012345678)"), STR("-123456789012345680")); +} +#endif + +TEST(xpath_xalan_string_7) +{ + xml_node c; + + CHECK_XPATH_STRING(c, STR("string(.1)"), STR("0.1")); + CHECK_XPATH_STRING(c, STR("string(.01)"), STR("0.01")); + CHECK_XPATH_STRING(c, STR("string(.012)"), STR("0.012")); + CHECK_XPATH_STRING(c, STR("string(.0123)"), STR("0.0123")); + CHECK_XPATH_STRING(c, STR("string(.01234)"), STR("0.01234")); + CHECK_XPATH_STRING(c, STR("string(.012345)"), STR("0.012345")); + CHECK_XPATH_STRING(c, STR("string(.0123456)"), STR("0.0123456")); + CHECK_XPATH_STRING(c, STR("string(.01234567)"), STR("0.01234567")); + CHECK_XPATH_STRING(c, STR("string(.012345678)"), STR("0.012345678")); + CHECK_XPATH_STRING(c, STR("string(.0123456789)"), STR("0.0123456789")); + CHECK_XPATH_STRING(c, STR("string(.10123456789)"), STR("0.10123456789")); + CHECK_XPATH_STRING(c, STR("string(.101234567892)"), STR("0.101234567892")); + CHECK_XPATH_STRING(c, STR("string(.1012345678923)"), STR("0.1012345678923")); + CHECK_XPATH_STRING(c, STR("string(.10123456789234)"), STR("0.10123456789234")); + CHECK_XPATH_STRING(c, STR("string(.101234567892345)"), STR("0.101234567892345")); + CHECK_XPATH_STRING(c, STR("string(.1012345678923456)"), STR("0.1012345678923456")); + CHECK_XPATH_STRING(c, STR("string(-.1)"), STR("-0.1")); + CHECK_XPATH_STRING(c, STR("string(-.01)"), STR("-0.01")); + CHECK_XPATH_STRING(c, STR("string(-.012)"), STR("-0.012")); + CHECK_XPATH_STRING(c, STR("string(-.0123)"), STR("-0.0123")); + CHECK_XPATH_STRING(c, STR("string(-.01234)"), STR("-0.01234")); + CHECK_XPATH_STRING(c, STR("string(-.012345)"), STR("-0.012345")); + CHECK_XPATH_STRING(c, STR("string(-.0123456)"), STR("-0.0123456")); + CHECK_XPATH_STRING(c, STR("string(-.01234567)"), STR("-0.01234567")); + CHECK_XPATH_STRING(c, STR("string(-.012345678)"), STR("-0.012345678")); + CHECK_XPATH_STRING(c, STR("string(-.0123456789)"), STR("-0.0123456789")); + CHECK_XPATH_STRING(c, STR("string(-.10123456789)"), STR("-0.10123456789")); + CHECK_XPATH_STRING(c, STR("string(-.101234567892)"), STR("-0.101234567892")); + CHECK_XPATH_STRING(c, STR("string(-.1012345678923)"), STR("-0.1012345678923")); + CHECK_XPATH_STRING(c, STR("string(-.10123456789234)"), STR("-0.10123456789234")); + CHECK_XPATH_STRING(c, STR("string(-.101234567892345)"), STR("-0.101234567892345")); + CHECK_XPATH_STRING(c, STR("string(-.1012345678923456)"), STR("-0.1012345678923456")); +} + +#if 0 // $ this test requires 16 decimal digits of mantissa in number->string conversion; we have 15 since only 15 is guaranteed, and 16 introduces 'garbage' digits in common cases like 0.4 +TEST(xpath_xalan_string_7_precision) +{ + xml_node c; + + CHECK_XPATH_STRING(c, STR("string(.10123456789234567)"), STR("0.10123456789234567")); + CHECK_XPATH_STRING(c, STR("string(.101234567892345678)"), STR("0.10123456789234568")); + CHECK_XPATH_STRING(c, STR("string(.1012345678923456789)"), STR("0.10123456789234568")); + CHECK_XPATH_STRING(c, STR("string(.10123456789234567893)"), STR("0.10123456789234568")); + CHECK_XPATH_STRING(c, STR("string(-.10123456789234567)"), STR("-0.10123456789234567")); + CHECK_XPATH_STRING(c, STR("string(-.101234567892345678)"), STR("-0.10123456789234568")); + CHECK_XPATH_STRING(c, STR("string(-.1012345678923456789)"), STR("-0.10123456789234568")); + CHECK_XPATH_STRING(c, STR("string(-.10123456789234567893)"), STR("-0.10123456789234568")); +} +#endif + +TEST(xpath_xalan_string_8) +{ + xml_node c; + + // $ originally all last digits were 5's; a fully compliant implementation should correctly convert those as well, + // however some of these failed because of atof truncation + CHECK_XPATH_STRING(c, STR("string(9.87654321012344)"), STR("9.87654321012344")); + CHECK_XPATH_STRING(c, STR("string(98.7654321012345)"), STR("98.7654321012345")); + CHECK_XPATH_STRING(c, STR("string(987.654321012345)"), STR("987.654321012345")); + CHECK_XPATH_STRING(c, STR("string(9876.54321012344)"), STR("9876.54321012344")); + CHECK_XPATH_STRING(c, STR("string(98765.4321012345)"), STR("98765.4321012345")); + CHECK_XPATH_STRING(c, STR("string(987654.321012345)"), STR("987654.321012345")); + CHECK_XPATH_STRING(c, STR("string(9876543.21012345)"), STR("9876543.21012345")); + CHECK_XPATH_STRING(c, STR("string(98765432.1012345)"), STR("98765432.1012345")); + CHECK_XPATH_STRING(c, STR("string(987654321.012345)"), STR("987654321.012345")); + CHECK_XPATH_STRING(c, STR("string(9876543210.12344)"), STR("9876543210.12344")); + CHECK_XPATH_STRING(c, STR("string(98765432101.2345)"), STR("98765432101.2345")); + CHECK_XPATH_STRING(c, STR("string(987654321012.345)"), STR("987654321012.345")); + CHECK_XPATH_STRING(c, STR("string(9876543210123.43)"), STR("9876543210123.43")); + CHECK_XPATH_STRING(c, STR("string(98765432101234.5)"), STR("98765432101234.5")); + + CHECK_XPATH_STRING(c, STR("string(-9.87654321012344)"), STR("-9.87654321012344")); + CHECK_XPATH_STRING(c, STR("string(-98.7654321012345)"), STR("-98.7654321012345")); + CHECK_XPATH_STRING(c, STR("string(-987.654321012345)"), STR("-987.654321012345")); + CHECK_XPATH_STRING(c, STR("string(-9876.54321012344)"), STR("-9876.54321012344")); + CHECK_XPATH_STRING(c, STR("string(-98765.4321012345)"), STR("-98765.4321012345")); + CHECK_XPATH_STRING(c, STR("string(-987654.321012345)"), STR("-987654.321012345")); + CHECK_XPATH_STRING(c, STR("string(-9876543.21012345)"), STR("-9876543.21012345")); + CHECK_XPATH_STRING(c, STR("string(-98765432.1012345)"), STR("-98765432.1012345")); + CHECK_XPATH_STRING(c, STR("string(-987654321.012345)"), STR("-987654321.012345")); + CHECK_XPATH_STRING(c, STR("string(-9876543210.12344)"), STR("-9876543210.12344")); + CHECK_XPATH_STRING(c, STR("string(-98765432101.2345)"), STR("-98765432101.2345")); + CHECK_XPATH_STRING(c, STR("string(-987654321012.345)"), STR("-987654321012.345")); + CHECK_XPATH_STRING(c, STR("string(-9876543210123.43)"), STR("-9876543210123.43")); + CHECK_XPATH_STRING(c, STR("string(-98765432101234.5)"), STR("-98765432101234.5")); +} + +TEST(xpath_xalan_string_9) +{ + xml_node c; + + CHECK_XPATH_STRING(c, STR("string(.123456789)"), STR("0.123456789")); + CHECK_XPATH_STRING(c, STR("string(.0123456789)"), STR("0.0123456789")); + CHECK_XPATH_STRING(c, STR("string(.00123456789)"), STR("0.00123456789")); + CHECK_XPATH_STRING(c, STR("string(.000123456789)"), STR("0.000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000123456789)"), STR("0.0000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000123456789)"), STR("0.00000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000123456789)"), STR("0.000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000123456789)"), STR("0.0000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000123456789)"), STR("0.00000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000123456789)"), STR("0.000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000123456789)"), STR("0.0000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000123456789)"), STR("0.00000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000123456789)"), STR("0.000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000123456789)"), STR("0.0000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000000123456789)"), STR("0.00000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000000123456789)"), STR("0.000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000000123456789)"), STR("0.0000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000000000123456789)"), STR("0.00000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000000000123456789)"), STR("0.000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000000000123456789)"), STR("0.0000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000000000000123456789)"), STR("0.00000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000000000000123456789)"), STR("0.000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000123456789)"), STR("0.0000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000123456789)"), STR("0.00000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000123456789)"), STR("0.000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000123456789)"), STR("0.0000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000123456789)"), STR("0.00000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000123456789)"), STR("0.000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000000123456789)"), STR("0.00000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000000123456789)"), STR("0.000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000000000123456789)"), STR("0.00000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000000000123456789)"), STR("0.000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000000000000123456789)"), STR("0.00000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000000000000123456789)"), STR("0.000000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.00000000000000000000000000000000000000123456789)"), STR("0.00000000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.000000000000000000000000000000000000000123456789)"), STR("0.000000000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(.0000000000000000000000000000000000000000123456789)"), STR("0.0000000000000000000000000000000000000000123456789")); + + CHECK_XPATH_STRING(c, STR("string(-.123456789)"), STR("-0.123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0123456789)"), STR("-0.0123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00123456789)"), STR("-0.00123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000123456789)"), STR("-0.000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000123456789)"), STR("-0.0000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000123456789)"), STR("-0.00000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000123456789)"), STR("-0.000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000123456789)"), STR("-0.0000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000123456789)"), STR("-0.00000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000123456789)"), STR("-0.000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000123456789)"), STR("-0.0000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000123456789)"), STR("-0.00000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000123456789)"), STR("-0.000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000123456789)"), STR("-0.0000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000000123456789)"), STR("-0.00000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000000123456789)"), STR("-0.000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000000123456789)"), STR("-0.0000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000000000123456789)"), STR("-0.00000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000000000123456789)"), STR("-0.000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000123456789)"), STR("-0.0000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000123456789)"), STR("-0.00000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000123456789)"), STR("-0.000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000123456789)"), STR("-0.0000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000123456789)"), STR("-0.00000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000123456789)"), STR("-0.000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000123456789)"), STR("-0.0000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000123456789)"), STR("-0.00000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000000123456789)"), STR("-0.00000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000000000123456789)"), STR("-0.00000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000000000000123456789)"), STR("-0.00000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.00000000000000000000000000000000000000123456789)"), STR("-0.00000000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.000000000000000000000000000000000000000123456789)"), STR("-0.000000000000000000000000000000000000000123456789")); + CHECK_XPATH_STRING(c, STR("string(-.0000000000000000000000000000000000000000123456789)"), STR("-0.0000000000000000000000000000000000000000123456789")); +} + +#endif diff --git a/tests/test_xpath_xalan_3.cpp b/tests/test_xpath_xalan_3.cpp index ee9253d..54b8a62 100644 --- a/tests/test_xpath_xalan_3.cpp +++ b/tests/test_xpath_xalan_3.cpp @@ -1,319 +1,319 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -TEST_XML(xpath_xalan_axes_1, "
") -{ - xml_node center = doc.select_single_node(STR("//center")).node(); - - CHECK_XPATH_NODESET(center, STR("self::*[near-south]")) % 10; - CHECK_XPATH_NODESET(center, STR("self::*[@center-attr-2]")) % 10; - CHECK_XPATH_NODESET(center, STR("preceding-sibling::*")) % 9 % 8 % 7; - CHECK_XPATH_NODESET(center, STR("preceding-sibling::*/following-sibling::*")) % 8 % 9 % 10 % 19 % 20 % 21; - CHECK_XPATH_NODESET(center, STR("preceding-sibling::*[2]/following-sibling::*")) % 9 % 10 % 19 % 20 % 21; - CHECK_XPATH_NODESET(center, STR("preceding-sibling::*[2]/following-sibling::*[4]")) % 20; - CHECK_XPATH_NODESET(center, STR("preceding-sibling::*[2]/following-sibling::*[4]/preceding-sibling::*[5]/following-sibling::*[4]/following-sibling::*[2]")) % 21; - CHECK_XPATH_NODESET(center, STR("following-sibling::*")) % 19 % 20 % 21; - CHECK_XPATH_NODESET(center, STR("following-sibling::*/preceding-sibling::*")) % 7 % 8 % 9 % 10 % 19 % 20; - CHECK_XPATH_NODESET(center, STR("following-sibling::*[2]/preceding-sibling::*")) % 19 % 10 % 9 % 8 % 7; - CHECK_XPATH_NODESET(center, STR("following-sibling::*[2]/preceding-sibling::*[4]")) % 8; - CHECK_XPATH_NODESET(center, STR("following-sibling::*[2]/preceding-sibling::*[4]/following-sibling::*[5]/preceding-sibling::*[4]/preceding-sibling::*[2]")) % 7; - CHECK_XPATH_NODESET(center, STR("following::*[4]/../*[2]")) % 4; - CHECK_XPATH_NODESET(center, STR("preceding::*[2]/../following::*")) % 22 % 23; - CHECK_XPATH_NODESET(center, STR("preceding::*[2]/../descendant::*[10]/following-sibling::east")) % 20; - CHECK_XPATH_NODESET(center, STR("//*")) % 2 % 3 % 4 % 5 % 6 % 7 % 8 % 9 % 10 % 14 % 15 % 16 % 17 % 18 % 19 % 20 % 21 % 22 % 23; - CHECK_XPATH_NODESET(center, STR("//ancestor::*")) % 2 % 5 % 6 % 10 % 15 % 16; - CHECK_XPATH_NODESET(center, STR("//*[count(ancestor::*) >= 2]/../parent::*")) % 2 % 5 % 6 % 10 % 15; - CHECK_XPATH_NODESET(center, STR("//*[count(./*/*) > 0]")) % 2 % 5 % 6 % 10 % 15; - CHECK_XPATH_NODESET(center, STR("@*/ancestor::*")) % 2 % 5 % 6 % 10; - CHECK_XPATH_NODESET(center, STR("@*/following::*")) % 14 % 15 % 16 % 17 % 18 % 19 % 20 % 21 % 22 % 23; - CHECK_XPATH_NODESET(center, STR("@*/preceding::*")) % 3 % 4 % 7 % 8 % 9; - CHECK_XPATH_NODESET(center, STR("preceding-sibling::*|following-sibling::*")) % 7 % 8 % 9 % 19 % 20 % 21; - CHECK_XPATH_NODESET(center, STR("(preceding-sibling::*|following-sibling::*)/ancestor::*[last()]/*[last()]")) % 23; - CHECK_XPATH_NODESET(center, STR(".//near-south/preceding-sibling::*|following-sibling::east/ancestor-or-self::*[2]")) % 6 % 14; -} - -TEST_XML_FLAGS(xpath_xalan_axes_2, " Level-1 Level-2 Level-3 Level-4
Level-5 Level-6
", parse_default | parse_comments | parse_pi) -{ - xml_node center = doc.select_single_node(STR("//center")).node(); - - CHECK_XPATH_NODESET(center, STR("@*")) % 21 % 22 % 23; - CHECK_XPATH_NODESET(center, STR("@*/child::*")); - CHECK_XPATH_NODESET(center, STR("@*/descendant::node()")); - CHECK_XPATH_NODESET(center, STR("@*/parent::node()")) % 20; - CHECK_XPATH_NODESET(center, STR("@*/ancestor::node()")) % 1 % 2 % 9 % 13 % 20; - CHECK_XPATH_NODESET(center, STR("@*/self::node()")) % 21 % 22 % 23; - CHECK_XPATH_NODESET(center, STR("@*/.")) % 21 % 22 % 23; - CHECK_XPATH_NODESET(center, STR("@*/descendant-or-self::node()")) % 21 % 22 % 23; - CHECK_XPATH_NODESET(center, STR("@*/ancestor-or-self::node()")) % 1 % 2 % 9 % 13 % 20 % 21 % 22 % 23; - CHECK_XPATH_NODESET(center, STR("@*/ancestor-or-self::*")) % 2 % 9 % 13 % 20; - CHECK_XPATH_NODESET(center, STR("@*/preceding-sibling::node()")); - CHECK_XPATH_NODESET(center, STR("@*/following-sibling::*")); - CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::*")) % 4 % 5 % 14 % 15 % 16; - CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::comment()")) % 6 % 10 % 17; - CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::text()")) % 3 % 7 % 11 % 18; - CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::processing-instruction()")) % 8 % 12 % 19; - CHECK_XPATH_NODESET(center, STR("@*/following::comment()")) % 25 % 29; - CHECK_XPATH_NODESET(center, STR("@*/following::processing-instruction()")) % 27 % 31; - CHECK_XPATH_NODESET(center, STR("@*/following::text()")) % 26 % 30; - CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::node()")) % 3 % 4 % 5 % 6 % 7 % 8 % 10 % 11 % 12 % 14 % 15 % 16 % 17 % 18 % 19; - CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/following::node()")) % 24 % 25 % 26 % 27 % 28 % 29 % 30 % 31 % 32 % 35 % 36 % 37 % 38 % 39 % 40 % 41; - - CHECK_XPATH_NODESET(center, STR("(//comment())[1]/..")) % 2; - CHECK_XPATH_NODESET(center, STR("(//attribute::*)[1]/../..")) % 13; -} - -TEST_XML(xpath_xalan_axes_3, "
") -{ - xml_node center = doc.select_single_node(STR("//center")).node(); - - CHECK_XPATH_NODESET(center, STR("ancestor-or-self::*")) % 8 % 4 % 3 % 2; - CHECK_XPATH_NODESET(center, STR("ancestor::*[3]")) % 2; - CHECK_XPATH_NODESET(center, STR("ancestor-or-self::*[1]")) % 8; - CHECK_XPATH_NODESET(center, STR("@*[2]")); - CHECK_XPATH_NODESET(center, STR("child::*[2]")); - CHECK_XPATH_NODESET(center, STR("child::near-south-west")); - CHECK_XPATH_NODESET(center, STR("descendant::*[3]")) % 11; - CHECK_XPATH_NODESET(center, STR("descendant::far-south")) % 11; - CHECK_XPATH_NODESET(center, STR("descendant-or-self::*[3]")) % 10; - CHECK_XPATH_NODESET(center, STR("descendant-or-self::far-south")) % 11; - CHECK_XPATH_NODESET(center, STR("descendant-or-self::center")) % 8; - CHECK_XPATH_NODESET(center, STR("following::*[4]")); - CHECK_XPATH_NODESET(center, STR("following::out-yonder-east")); - CHECK_XPATH_NODESET(center, STR("preceding::*[4]")); - CHECK_XPATH_NODESET(center, STR("preceding::out-yonder-west")); - CHECK_XPATH_NODESET(center, STR("following-sibling::*[2]")) % 13; - CHECK_XPATH_NODESET(center, STR("following-sibling::east")) % 13; - CHECK_XPATH_NODESET(center, STR("preceding-sibling::*[2]")) % 6; - CHECK_XPATH_NODESET(center, STR("preceding-sibling::west")) % 6; - CHECK_XPATH_NODESET(center, STR("parent::near-north")) % 4; - CHECK_XPATH_NODESET(center, STR("parent::*[1]")) % 4; - CHECK_XPATH_NODESET(center, STR("parent::foo")); - CHECK_XPATH_NODESET(center, STR("..")) % 4; - CHECK_XPATH_NODESET(center, STR("self::center")) % 8; - CHECK_XPATH_NODESET(center, STR("self::*[1]")) % 8; - CHECK_XPATH_NODESET(center, STR("self::foo")); - CHECK_XPATH_NODESET(center, STR(".")) % 8; - CHECK_XPATH_NODESET(center, STR("/far-north/north/near-north/center/ancestor-or-self::*")) % 8 % 4 % 3 % 2; -} - -TEST_XML(xpath_xalan_axes_4, "
") -{ - xml_node north = doc.select_single_node(STR("//north")).node(); - - CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(/descendant::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/child::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant::far-west)"), STR("far-west")); - - CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::north/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::north/child::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(/descendant::near-north/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::north/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::north/child::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::near-north/descendant-or-self::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::near-north/child::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant-or-self::north/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant-or-self::north/child::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant::near-north/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant::near-north/child::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/child::near-north/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/child::near-north/child::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant-or-self::near-north/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant-or-self::near-north/child::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant::far-west/descendant-or-self::far-west)"), STR("far-west")); - - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/child::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant::far-west)"), STR("far-west")); - - CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::node()/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::node()/child::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(/descendant::node()/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::node()/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::node()/child::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::node()/descendant-or-self::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::node()/child::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant-or-self::node()/descendant-or-self::north)"), STR("north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant-or-self::node()/child::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant::node()/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant::node()/child::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/child::node()/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/child::node()/child::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant-or-self::node()/descendant-or-self::near-north)"), STR("near-north")); - CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant-or-self::node()/child::far-west)"), STR("far-west")); - CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant::node()/descendant-or-self::far-west)"), STR("far-west")); -} - -TEST_XML_FLAGS(xpath_xalan_axes_5, "text", parse_default | parse_comments | parse_pi) -{ - CHECK_XPATH_NODESET(doc, STR("text/self::text()")); - CHECK_XPATH_NODESET(doc, STR("comment/self::comment()")); - CHECK_XPATH_NODESET(doc, STR("pi/self::processing-instruction()")); -} - -TEST_XML(xpath_xalan_axes_6, "Test for source tree depthABCDEFGHIJKLMNO") -{ - CHECK_XPATH_NODESET(doc, STR("//T")) % 3 % 6 % 9 % 12 % 15 % 18 % 21 % 24 % 27 % 30 % 33 % 36 % 39 % 42 % 45 % 48; -} - -TEST_XML(xpath_xalan_axes_7, "
") -{ - xml_node center = doc.select_single_node(STR("//center")).node(); - - CHECK_XPATH_NODESET(center, STR("attribute::*[2]")) % 10; - CHECK_XPATH_NODESET(center, STR("@*")) % 9 % 10 % 11; - CHECK_XPATH_NODESET(center, STR("child::*/child::*")) % 13; - CHECK_XPATH_NODESET(center, STR("child::*/descendant::*")) % 13 % 14; - CHECK_XPATH_NODESET(center, STR("descendant::*/child::*")) % 13 % 14; -} - -TEST_XML(xpath_xalan_axes_8, "
") -{ - xml_node near_north = doc.select_single_node(STR("//near-north")).node(); - - CHECK_XPATH_NODESET(near_north, STR("center//child::*")) % 12 % 13 % 14 % 15 % 16; - CHECK_XPATH_NODESET(near_north, STR("center//descendant::*")) % 12 % 13 % 14 % 15 % 16; - CHECK_XPATH_NODESET(near_north, STR("center/descendant::*")) % 12 % 13 % 14 % 15 % 16; - CHECK_XPATH_NODESET(near_north, STR("center/child::*")) % 12 % 13 % 16; - CHECK_XPATH_NODESET(near_north, STR("center//*")) % 12 % 13 % 14 % 15 % 16; -} - -TEST_XML(xpath_xalan_axes_9, "") -{ - xml_node baz = doc.select_single_node(STR("//baz")).node(); - - CHECK_XPATH_NODESET(baz, STR("ancestor-or-self::*[@att1][1]/@att1")) % 8; - CHECK_XPATH_NODESET(baz, STR("(ancestor-or-self::*)[@att1][1]/@att1")) % 4; - - CHECK_XPATH_NODESET(baz, STR("ancestor::foo[1]/@att1")) % 8; - CHECK_XPATH_NODESET(baz, STR("(ancestor::foo[1])/@att1")) % 8; - CHECK_XPATH_NODESET(baz, STR("(ancestor::foo)[1]/@att1")) % 4; - CHECK_XPATH_NODESET(baz, STR("((ancestor::foo))[1]/@att1")) % 4; - CHECK_XPATH_NODESET(baz, STR("(((ancestor::foo)[1])/@att1)")) % 4; - - xml_node bar = doc.child(STR("doc")).child(STR("bar")); - - CHECK_XPATH_NODESET(bar, STR("preceding::foo[1]/@att1")) % 8; - CHECK_XPATH_NODESET(bar, STR("(preceding::foo)[1]/@att1")) % 4; -} - -TEST_XML(xpath_xalan_axes_10, "") -{ - xml_node baz = doc.child(STR("doc")).child(STR("baz")); - - CHECK_XPATH_NODESET(baz, STR("preceding-sibling::foo[1]/@att1")) % 8; - CHECK_XPATH_NODESET(baz, STR("(preceding-sibling::foo)[1]/@att1")) % 4; -} - -TEST_XML(xpath_xalan_axes_11, "
helloahoy
goodbyesayonaraadios
alohaA3b-1A3b-2shalom
") -{ - xml_node chapter = doc.child(STR("chapter")); - - CHECK_XPATH_NUMBER(doc, STR("count(//@*)"), 16); - CHECK_XPATH_NUMBER(doc, STR("count(//@title)"), 12); - CHECK_XPATH_NUMBER(doc, STR("count(//section//@*)"), 14); - CHECK_XPATH_NUMBER(doc, STR("count(//section//@title)"), 11); - - CHECK_XPATH_NUMBER(chapter, STR("count(.//@*)"), 16); - CHECK_XPATH_NUMBER(chapter, STR("count(.//@title)"), 12); - CHECK_XPATH_NUMBER(chapter, STR("count(section[1]//@*)"), 5); - CHECK_XPATH_NUMBER(chapter, STR("count(section[1]//@title)"), 3); - CHECK_XPATH_NUMBER(chapter, STR("count(section[2]//@*)"), 4); - CHECK_XPATH_NUMBER(chapter, STR("count(section[2]//@title)"), 4); - CHECK_XPATH_NUMBER(chapter, STR("count(section[3]//@*)"), 5); - CHECK_XPATH_NUMBER(chapter, STR("count(section[3]//@title)"), 4); -} - -TEST_XML_FLAGS(xpath_xalan_axes_12, "north-text1
center-text1south-textcenter-text2
north-text2
", parse_default | parse_comments) -{ - CHECK_XPATH_NODESET(doc, STR("/descendant::*")) % 2 % 3 % 5 % 6 % 7 % 9 % 10 % 12 % 13 % 15 % 17 % 18 % 20; - CHECK_XPATH_NODESET(doc, STR("far-north/..//*")) % 2 % 3 % 5 % 6 % 7 % 9 % 10 % 12 % 13 % 15 % 17 % 18 % 20; - CHECK_XPATH_NODESET(doc, STR("far-north/north/..//*")) % 3 % 5 % 6 % 7 % 9 % 10 % 12 % 13 % 15 % 17 % 18 % 20; - CHECK_XPATH_NODESET(doc, STR("far-north/north-yonder/..//*")); -} - -TEST_XML(xpath_xalan_axes_13, "") -{ - xml_node d = doc.child(STR("doc")); - xml_node baz = doc.select_single_node(STR("//baz")).node(); - - CHECK_XPATH_NUMBER(d, STR("count(descendant-or-self::*/@att1)"), 5); - CHECK_XPATH_NODESET(d, STR("descendant-or-self::*/@att1[last()]")) % 3 % 5 % 7 % 9 % 11; - CHECK_XPATH_STRING(d, STR("string(descendant-or-self::*/@att1[last()])"), STR("e")); - CHECK_XPATH_NODESET(d, STR("descendant-or-self::*[last()]/@att1")) % 11; - CHECK_XPATH_NODESET(d, STR("(descendant-or-self::*/@att1)[last()]")) % 11; - - CHECK_XPATH_NUMBER(baz, STR("count(ancestor-or-self::*/@att1)"), 5); - CHECK_XPATH_NODESET(baz, STR("ancestor-or-self::*/@att1[last()]")) % 3 % 5 % 7 % 9 % 11; - CHECK_XPATH_STRING(baz, STR("string(ancestor-or-self::*/@att1[last()])"), STR("e")); - CHECK_XPATH_NODESET(baz, STR("(ancestor-or-self::*)/@att1[last()]")) % 3 % 5 % 7 % 9 % 11; - CHECK_XPATH_STRING(baz, STR("string((ancestor-or-self::*)/@att1[last()])"), STR("e")); - CHECK_XPATH_NODESET(baz, STR("(ancestor-or-self::*/@att1)[last()]")) % 11; - CHECK_XPATH_NODESET(baz, STR("(ancestor::*|self::*)/@att1[last()]")) % 3 % 5 % 7 % 9 % 11; - CHECK_XPATH_STRING(baz, STR("string((ancestor::*|self::*)/@att1[last()])"), STR("e")); - CHECK_XPATH_NODESET(baz, STR("((ancestor::*|self::*)/@att1)[last()]")) % 11; -} - -TEST_XML_FLAGS(xpath_xalan_axes_14, "text
text", parse_default | parse_comments | parse_pi) -{ - CHECK_XPATH_NODESET(doc, STR("//center/preceding::node()")) % 7 % 6 % 5 % 3; - CHECK_XPATH_NODESET(doc, STR("//center/following::node()")) % 9 % 10 % 11 % 12; -} - -TEST_XML(xpath_xalan_axes_15, "is newxyzis new but has textis not new") -{ - CHECK_XPATH_NODESET(doc, STR("//text()[ancestor::*[@new='true'][not(text())]]")) % 6; - CHECK_XPATH_NODESET(doc, STR("//text()[ancestor::*[2][@new]]")) % 6 % 11 % 15; - - xml_node foo = doc.child(STR("doc")).child(STR("foo")).child(STR("baz")).first_child(); - - CHECK_XPATH_STRING(foo, STR("name(ancestor::*[3])"), STR("doc")); - CHECK_XPATH_STRING(foo, STR("name(ancestor::*[2])"), STR("foo")); - CHECK_XPATH_STRING(foo, STR("name(ancestor::*[1])"), STR("baz")); -} - -TEST_XML(xpath_xalan_axes_16, "") -{ - xml_node c1 = doc.child(STR("doc")).child(STR("child")), c2 = c1.next_sibling(), c3 = c2.next_sibling(), c4 = c3.next_sibling(), c5 = c4.next_sibling(), c6 = c5.next_sibling(); - - CHECK_XPATH_STRING(c1.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("1,2")); - CHECK_XPATH_STRING(c1.last_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("3,4")); - - CHECK_XPATH_STRING(c2.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("3,4")); - - CHECK_XPATH_STRING(c3.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("1,2")); - - CHECK_XPATH_STRING(c4.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,1")); - - CHECK_XPATH_STRING(c5.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,1")); - CHECK_XPATH_STRING(c5.last_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,1")); - - CHECK_XPATH_STRING(c6, STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,1")); - - CHECK_XPATH_STRING(xml_node(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,0")); -} - -TEST_XML(xpath_xalan_axes_17, "here") -{ - CHECK_XPATH_NODESET(doc, STR("//xx/descendant::*")) % 10 % 20 % 24 % 26 % 27; -} - -TEST_XML(xpath_xalan_axes_18, "
") -{ - xml_node center = doc.child(STR("north")).child(STR("center")); - - CHECK_XPATH_NODESET(center, STR("@*/self::node()")) % 4; - CHECK_XPATH_NODESET(center, STR("@*/self::*")); // * tests for principal node type - CHECK_XPATH_NODESET(center, STR("@*/self::text()")); - CHECK_XPATH_NODESET(center, STR("@*/self::center-attr")); // * tests for principal node type -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +TEST_XML(xpath_xalan_axes_1, "
") +{ + xml_node center = doc.select_single_node(STR("//center")).node(); + + CHECK_XPATH_NODESET(center, STR("self::*[near-south]")) % 10; + CHECK_XPATH_NODESET(center, STR("self::*[@center-attr-2]")) % 10; + CHECK_XPATH_NODESET(center, STR("preceding-sibling::*")) % 9 % 8 % 7; + CHECK_XPATH_NODESET(center, STR("preceding-sibling::*/following-sibling::*")) % 8 % 9 % 10 % 19 % 20 % 21; + CHECK_XPATH_NODESET(center, STR("preceding-sibling::*[2]/following-sibling::*")) % 9 % 10 % 19 % 20 % 21; + CHECK_XPATH_NODESET(center, STR("preceding-sibling::*[2]/following-sibling::*[4]")) % 20; + CHECK_XPATH_NODESET(center, STR("preceding-sibling::*[2]/following-sibling::*[4]/preceding-sibling::*[5]/following-sibling::*[4]/following-sibling::*[2]")) % 21; + CHECK_XPATH_NODESET(center, STR("following-sibling::*")) % 19 % 20 % 21; + CHECK_XPATH_NODESET(center, STR("following-sibling::*/preceding-sibling::*")) % 7 % 8 % 9 % 10 % 19 % 20; + CHECK_XPATH_NODESET(center, STR("following-sibling::*[2]/preceding-sibling::*")) % 19 % 10 % 9 % 8 % 7; + CHECK_XPATH_NODESET(center, STR("following-sibling::*[2]/preceding-sibling::*[4]")) % 8; + CHECK_XPATH_NODESET(center, STR("following-sibling::*[2]/preceding-sibling::*[4]/following-sibling::*[5]/preceding-sibling::*[4]/preceding-sibling::*[2]")) % 7; + CHECK_XPATH_NODESET(center, STR("following::*[4]/../*[2]")) % 4; + CHECK_XPATH_NODESET(center, STR("preceding::*[2]/../following::*")) % 22 % 23; + CHECK_XPATH_NODESET(center, STR("preceding::*[2]/../descendant::*[10]/following-sibling::east")) % 20; + CHECK_XPATH_NODESET(center, STR("//*")) % 2 % 3 % 4 % 5 % 6 % 7 % 8 % 9 % 10 % 14 % 15 % 16 % 17 % 18 % 19 % 20 % 21 % 22 % 23; + CHECK_XPATH_NODESET(center, STR("//ancestor::*")) % 2 % 5 % 6 % 10 % 15 % 16; + CHECK_XPATH_NODESET(center, STR("//*[count(ancestor::*) >= 2]/../parent::*")) % 2 % 5 % 6 % 10 % 15; + CHECK_XPATH_NODESET(center, STR("//*[count(./*/*) > 0]")) % 2 % 5 % 6 % 10 % 15; + CHECK_XPATH_NODESET(center, STR("@*/ancestor::*")) % 2 % 5 % 6 % 10; + CHECK_XPATH_NODESET(center, STR("@*/following::*")) % 14 % 15 % 16 % 17 % 18 % 19 % 20 % 21 % 22 % 23; + CHECK_XPATH_NODESET(center, STR("@*/preceding::*")) % 3 % 4 % 7 % 8 % 9; + CHECK_XPATH_NODESET(center, STR("preceding-sibling::*|following-sibling::*")) % 7 % 8 % 9 % 19 % 20 % 21; + CHECK_XPATH_NODESET(center, STR("(preceding-sibling::*|following-sibling::*)/ancestor::*[last()]/*[last()]")) % 23; + CHECK_XPATH_NODESET(center, STR(".//near-south/preceding-sibling::*|following-sibling::east/ancestor-or-self::*[2]")) % 6 % 14; +} + +TEST_XML_FLAGS(xpath_xalan_axes_2, " Level-1 Level-2 Level-3 Level-4
Level-5 Level-6
", parse_default | parse_comments | parse_pi) +{ + xml_node center = doc.select_single_node(STR("//center")).node(); + + CHECK_XPATH_NODESET(center, STR("@*")) % 21 % 22 % 23; + CHECK_XPATH_NODESET(center, STR("@*/child::*")); + CHECK_XPATH_NODESET(center, STR("@*/descendant::node()")); + CHECK_XPATH_NODESET(center, STR("@*/parent::node()")) % 20; + CHECK_XPATH_NODESET(center, STR("@*/ancestor::node()")) % 1 % 2 % 9 % 13 % 20; + CHECK_XPATH_NODESET(center, STR("@*/self::node()")) % 21 % 22 % 23; + CHECK_XPATH_NODESET(center, STR("@*/.")) % 21 % 22 % 23; + CHECK_XPATH_NODESET(center, STR("@*/descendant-or-self::node()")) % 21 % 22 % 23; + CHECK_XPATH_NODESET(center, STR("@*/ancestor-or-self::node()")) % 1 % 2 % 9 % 13 % 20 % 21 % 22 % 23; + CHECK_XPATH_NODESET(center, STR("@*/ancestor-or-self::*")) % 2 % 9 % 13 % 20; + CHECK_XPATH_NODESET(center, STR("@*/preceding-sibling::node()")); + CHECK_XPATH_NODESET(center, STR("@*/following-sibling::*")); + CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::*")) % 4 % 5 % 14 % 15 % 16; + CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::comment()")) % 6 % 10 % 17; + CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::text()")) % 3 % 7 % 11 % 18; + CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::processing-instruction()")) % 8 % 12 % 19; + CHECK_XPATH_NODESET(center, STR("@*/following::comment()")) % 25 % 29; + CHECK_XPATH_NODESET(center, STR("@*/following::processing-instruction()")) % 27 % 31; + CHECK_XPATH_NODESET(center, STR("@*/following::text()")) % 26 % 30; + CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/preceding::node()")) % 3 % 4 % 5 % 6 % 7 % 8 % 10 % 11 % 12 % 14 % 15 % 16 % 17 % 18 % 19; + CHECK_XPATH_NODESET(center, STR("@*/ancestor::*/near-north/*[4]/@*/following::node()")) % 24 % 25 % 26 % 27 % 28 % 29 % 30 % 31 % 32 % 35 % 36 % 37 % 38 % 39 % 40 % 41; + + CHECK_XPATH_NODESET(center, STR("(//comment())[1]/..")) % 2; + CHECK_XPATH_NODESET(center, STR("(//attribute::*)[1]/../..")) % 13; +} + +TEST_XML(xpath_xalan_axes_3, "
") +{ + xml_node center = doc.select_single_node(STR("//center")).node(); + + CHECK_XPATH_NODESET(center, STR("ancestor-or-self::*")) % 8 % 4 % 3 % 2; + CHECK_XPATH_NODESET(center, STR("ancestor::*[3]")) % 2; + CHECK_XPATH_NODESET(center, STR("ancestor-or-self::*[1]")) % 8; + CHECK_XPATH_NODESET(center, STR("@*[2]")); + CHECK_XPATH_NODESET(center, STR("child::*[2]")); + CHECK_XPATH_NODESET(center, STR("child::near-south-west")); + CHECK_XPATH_NODESET(center, STR("descendant::*[3]")) % 11; + CHECK_XPATH_NODESET(center, STR("descendant::far-south")) % 11; + CHECK_XPATH_NODESET(center, STR("descendant-or-self::*[3]")) % 10; + CHECK_XPATH_NODESET(center, STR("descendant-or-self::far-south")) % 11; + CHECK_XPATH_NODESET(center, STR("descendant-or-self::center")) % 8; + CHECK_XPATH_NODESET(center, STR("following::*[4]")); + CHECK_XPATH_NODESET(center, STR("following::out-yonder-east")); + CHECK_XPATH_NODESET(center, STR("preceding::*[4]")); + CHECK_XPATH_NODESET(center, STR("preceding::out-yonder-west")); + CHECK_XPATH_NODESET(center, STR("following-sibling::*[2]")) % 13; + CHECK_XPATH_NODESET(center, STR("following-sibling::east")) % 13; + CHECK_XPATH_NODESET(center, STR("preceding-sibling::*[2]")) % 6; + CHECK_XPATH_NODESET(center, STR("preceding-sibling::west")) % 6; + CHECK_XPATH_NODESET(center, STR("parent::near-north")) % 4; + CHECK_XPATH_NODESET(center, STR("parent::*[1]")) % 4; + CHECK_XPATH_NODESET(center, STR("parent::foo")); + CHECK_XPATH_NODESET(center, STR("..")) % 4; + CHECK_XPATH_NODESET(center, STR("self::center")) % 8; + CHECK_XPATH_NODESET(center, STR("self::*[1]")) % 8; + CHECK_XPATH_NODESET(center, STR("self::foo")); + CHECK_XPATH_NODESET(center, STR(".")) % 8; + CHECK_XPATH_NODESET(center, STR("/far-north/north/near-north/center/ancestor-or-self::*")) % 8 % 4 % 3 % 2; +} + +TEST_XML(xpath_xalan_axes_4, "
") +{ + xml_node north = doc.select_single_node(STR("//north")).node(); + + CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(/descendant::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/child::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant::far-west)"), STR("far-west")); + + CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::north/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::north/child::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(/descendant::near-north/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::north/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::north/child::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::near-north/descendant-or-self::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::near-north/child::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant-or-self::north/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant-or-self::north/child::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant::near-north/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/descendant::near-north/child::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/child::near-north/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::north/child::near-north/child::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant-or-self::near-north/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant-or-self::near-north/child::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(descendant::near-north/descendant::far-west/descendant-or-self::far-west)"), STR("far-west")); + + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/child::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant::far-west)"), STR("far-west")); + + CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::node()/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(/descendant-or-self::node()/child::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(/descendant::node()/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::node()/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant-or-self::node()/child::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::node()/descendant-or-self::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(self::node()/descendant::node()/child::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant-or-self::node()/descendant-or-self::north)"), STR("north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant-or-self::node()/child::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant::node()/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/descendant::node()/child::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/child::node()/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant-or-self::node()/child::node()/child::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant-or-self::node()/descendant-or-self::near-north)"), STR("near-north")); + CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant-or-self::node()/child::far-west)"), STR("far-west")); + CHECK_XPATH_STRING(north, STR("name(descendant::node()/descendant::node()/descendant-or-self::far-west)"), STR("far-west")); +} + +TEST_XML_FLAGS(xpath_xalan_axes_5, "text", parse_default | parse_comments | parse_pi) +{ + CHECK_XPATH_NODESET(doc, STR("text/self::text()")); + CHECK_XPATH_NODESET(doc, STR("comment/self::comment()")); + CHECK_XPATH_NODESET(doc, STR("pi/self::processing-instruction()")); +} + +TEST_XML(xpath_xalan_axes_6, "Test for source tree depthABCDEFGHIJKLMNO") +{ + CHECK_XPATH_NODESET(doc, STR("//T")) % 3 % 6 % 9 % 12 % 15 % 18 % 21 % 24 % 27 % 30 % 33 % 36 % 39 % 42 % 45 % 48; +} + +TEST_XML(xpath_xalan_axes_7, "
") +{ + xml_node center = doc.select_single_node(STR("//center")).node(); + + CHECK_XPATH_NODESET(center, STR("attribute::*[2]")) % 10; + CHECK_XPATH_NODESET(center, STR("@*")) % 9 % 10 % 11; + CHECK_XPATH_NODESET(center, STR("child::*/child::*")) % 13; + CHECK_XPATH_NODESET(center, STR("child::*/descendant::*")) % 13 % 14; + CHECK_XPATH_NODESET(center, STR("descendant::*/child::*")) % 13 % 14; +} + +TEST_XML(xpath_xalan_axes_8, "
") +{ + xml_node near_north = doc.select_single_node(STR("//near-north")).node(); + + CHECK_XPATH_NODESET(near_north, STR("center//child::*")) % 12 % 13 % 14 % 15 % 16; + CHECK_XPATH_NODESET(near_north, STR("center//descendant::*")) % 12 % 13 % 14 % 15 % 16; + CHECK_XPATH_NODESET(near_north, STR("center/descendant::*")) % 12 % 13 % 14 % 15 % 16; + CHECK_XPATH_NODESET(near_north, STR("center/child::*")) % 12 % 13 % 16; + CHECK_XPATH_NODESET(near_north, STR("center//*")) % 12 % 13 % 14 % 15 % 16; +} + +TEST_XML(xpath_xalan_axes_9, "") +{ + xml_node baz = doc.select_single_node(STR("//baz")).node(); + + CHECK_XPATH_NODESET(baz, STR("ancestor-or-self::*[@att1][1]/@att1")) % 8; + CHECK_XPATH_NODESET(baz, STR("(ancestor-or-self::*)[@att1][1]/@att1")) % 4; + + CHECK_XPATH_NODESET(baz, STR("ancestor::foo[1]/@att1")) % 8; + CHECK_XPATH_NODESET(baz, STR("(ancestor::foo[1])/@att1")) % 8; + CHECK_XPATH_NODESET(baz, STR("(ancestor::foo)[1]/@att1")) % 4; + CHECK_XPATH_NODESET(baz, STR("((ancestor::foo))[1]/@att1")) % 4; + CHECK_XPATH_NODESET(baz, STR("(((ancestor::foo)[1])/@att1)")) % 4; + + xml_node bar = doc.child(STR("doc")).child(STR("bar")); + + CHECK_XPATH_NODESET(bar, STR("preceding::foo[1]/@att1")) % 8; + CHECK_XPATH_NODESET(bar, STR("(preceding::foo)[1]/@att1")) % 4; +} + +TEST_XML(xpath_xalan_axes_10, "") +{ + xml_node baz = doc.child(STR("doc")).child(STR("baz")); + + CHECK_XPATH_NODESET(baz, STR("preceding-sibling::foo[1]/@att1")) % 8; + CHECK_XPATH_NODESET(baz, STR("(preceding-sibling::foo)[1]/@att1")) % 4; +} + +TEST_XML(xpath_xalan_axes_11, "
helloahoy
goodbyesayonaraadios
alohaA3b-1A3b-2shalom
") +{ + xml_node chapter = doc.child(STR("chapter")); + + CHECK_XPATH_NUMBER(doc, STR("count(//@*)"), 16); + CHECK_XPATH_NUMBER(doc, STR("count(//@title)"), 12); + CHECK_XPATH_NUMBER(doc, STR("count(//section//@*)"), 14); + CHECK_XPATH_NUMBER(doc, STR("count(//section//@title)"), 11); + + CHECK_XPATH_NUMBER(chapter, STR("count(.//@*)"), 16); + CHECK_XPATH_NUMBER(chapter, STR("count(.//@title)"), 12); + CHECK_XPATH_NUMBER(chapter, STR("count(section[1]//@*)"), 5); + CHECK_XPATH_NUMBER(chapter, STR("count(section[1]//@title)"), 3); + CHECK_XPATH_NUMBER(chapter, STR("count(section[2]//@*)"), 4); + CHECK_XPATH_NUMBER(chapter, STR("count(section[2]//@title)"), 4); + CHECK_XPATH_NUMBER(chapter, STR("count(section[3]//@*)"), 5); + CHECK_XPATH_NUMBER(chapter, STR("count(section[3]//@title)"), 4); +} + +TEST_XML_FLAGS(xpath_xalan_axes_12, "north-text1
center-text1south-textcenter-text2
north-text2
", parse_default | parse_comments) +{ + CHECK_XPATH_NODESET(doc, STR("/descendant::*")) % 2 % 3 % 5 % 6 % 7 % 9 % 10 % 12 % 13 % 15 % 17 % 18 % 20; + CHECK_XPATH_NODESET(doc, STR("far-north/..//*")) % 2 % 3 % 5 % 6 % 7 % 9 % 10 % 12 % 13 % 15 % 17 % 18 % 20; + CHECK_XPATH_NODESET(doc, STR("far-north/north/..//*")) % 3 % 5 % 6 % 7 % 9 % 10 % 12 % 13 % 15 % 17 % 18 % 20; + CHECK_XPATH_NODESET(doc, STR("far-north/north-yonder/..//*")); +} + +TEST_XML(xpath_xalan_axes_13, "") +{ + xml_node d = doc.child(STR("doc")); + xml_node baz = doc.select_single_node(STR("//baz")).node(); + + CHECK_XPATH_NUMBER(d, STR("count(descendant-or-self::*/@att1)"), 5); + CHECK_XPATH_NODESET(d, STR("descendant-or-self::*/@att1[last()]")) % 3 % 5 % 7 % 9 % 11; + CHECK_XPATH_STRING(d, STR("string(descendant-or-self::*/@att1[last()])"), STR("e")); + CHECK_XPATH_NODESET(d, STR("descendant-or-self::*[last()]/@att1")) % 11; + CHECK_XPATH_NODESET(d, STR("(descendant-or-self::*/@att1)[last()]")) % 11; + + CHECK_XPATH_NUMBER(baz, STR("count(ancestor-or-self::*/@att1)"), 5); + CHECK_XPATH_NODESET(baz, STR("ancestor-or-self::*/@att1[last()]")) % 3 % 5 % 7 % 9 % 11; + CHECK_XPATH_STRING(baz, STR("string(ancestor-or-self::*/@att1[last()])"), STR("e")); + CHECK_XPATH_NODESET(baz, STR("(ancestor-or-self::*)/@att1[last()]")) % 3 % 5 % 7 % 9 % 11; + CHECK_XPATH_STRING(baz, STR("string((ancestor-or-self::*)/@att1[last()])"), STR("e")); + CHECK_XPATH_NODESET(baz, STR("(ancestor-or-self::*/@att1)[last()]")) % 11; + CHECK_XPATH_NODESET(baz, STR("(ancestor::*|self::*)/@att1[last()]")) % 3 % 5 % 7 % 9 % 11; + CHECK_XPATH_STRING(baz, STR("string((ancestor::*|self::*)/@att1[last()])"), STR("e")); + CHECK_XPATH_NODESET(baz, STR("((ancestor::*|self::*)/@att1)[last()]")) % 11; +} + +TEST_XML_FLAGS(xpath_xalan_axes_14, "text
text", parse_default | parse_comments | parse_pi) +{ + CHECK_XPATH_NODESET(doc, STR("//center/preceding::node()")) % 7 % 6 % 5 % 3; + CHECK_XPATH_NODESET(doc, STR("//center/following::node()")) % 9 % 10 % 11 % 12; +} + +TEST_XML(xpath_xalan_axes_15, "is newxyzis new but has textis not new") +{ + CHECK_XPATH_NODESET(doc, STR("//text()[ancestor::*[@new='true'][not(text())]]")) % 6; + CHECK_XPATH_NODESET(doc, STR("//text()[ancestor::*[2][@new]]")) % 6 % 11 % 15; + + xml_node foo = doc.child(STR("doc")).child(STR("foo")).child(STR("baz")).first_child(); + + CHECK_XPATH_STRING(foo, STR("name(ancestor::*[3])"), STR("doc")); + CHECK_XPATH_STRING(foo, STR("name(ancestor::*[2])"), STR("foo")); + CHECK_XPATH_STRING(foo, STR("name(ancestor::*[1])"), STR("baz")); +} + +TEST_XML(xpath_xalan_axes_16, "") +{ + xml_node c1 = doc.child(STR("doc")).child(STR("child")), c2 = c1.next_sibling(), c3 = c2.next_sibling(), c4 = c3.next_sibling(), c5 = c4.next_sibling(), c6 = c5.next_sibling(); + + CHECK_XPATH_STRING(c1.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("1,2")); + CHECK_XPATH_STRING(c1.last_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("3,4")); + + CHECK_XPATH_STRING(c2.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("3,4")); + + CHECK_XPATH_STRING(c3.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("1,2")); + + CHECK_XPATH_STRING(c4.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,1")); + + CHECK_XPATH_STRING(c5.first_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,1")); + CHECK_XPATH_STRING(c5.last_child(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,1")); + + CHECK_XPATH_STRING(c6, STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,1")); + + CHECK_XPATH_STRING(xml_node(), STR("concat(count(descendant::*), ',', count(descendant-or-self::*))"), STR("0,0")); +} + +TEST_XML(xpath_xalan_axes_17, "here") +{ + CHECK_XPATH_NODESET(doc, STR("//xx/descendant::*")) % 10 % 20 % 24 % 26 % 27; +} + +TEST_XML(xpath_xalan_axes_18, "
") +{ + xml_node center = doc.child(STR("north")).child(STR("center")); + + CHECK_XPATH_NODESET(center, STR("@*/self::node()")) % 4; + CHECK_XPATH_NODESET(center, STR("@*/self::*")); // * tests for principal node type + CHECK_XPATH_NODESET(center, STR("@*/self::text()")); + CHECK_XPATH_NODESET(center, STR("@*/self::center-attr")); // * tests for principal node type +} + +#endif diff --git a/tests/test_xpath_xalan_4.cpp b/tests/test_xpath_xalan_4.cpp index 10784da..c71eaf7 100644 --- a/tests/test_xpath_xalan_4.cpp +++ b/tests/test_xpath_xalan_4.cpp @@ -1,298 +1,298 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -TEST_XML(xpath_xalan_position_1, "1234") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_BOOLEAN(c, STR("position()=1"), true); - CHECK_XPATH_NODESET(c, STR("*[position()=4]")) % 9; -} - -TEST_XML_FLAGS(xpath_xalan_position_2, "111912632827256345", parse_default | parse_comments | parse_pi) -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NODESET(c, STR("*[@test and position()=8]")) % 27; - CHECK_XPATH_NODESET(c, STR("*[@test][position()=4]/num")) % 29; - CHECK_XPATH_NUMBER(c, STR("count(*)"), 8); - CHECK_XPATH_NODESET(c, STR("*[last()=position()]")) % 27; - CHECK_XPATH_NODESET(c, STR("a[position()=2]")) % 7; - CHECK_XPATH_NODESET(c, STR("a[3]/../a[position()=4]/num/../@test")) % 14; - CHECK_XPATH_BOOLEAN(c, STR("not(position()=last())"), false); - CHECK_XPATH_BOOLEAN(c, STR("position()=2"), false); - CHECK_XPATH_BOOLEAN(c, STR("last()=1"), true); - CHECK_XPATH_BOOLEAN(c, STR("last()+2=3"), true); - CHECK_XPATH_NODESET(c, STR("a[position()=5 mod 3]")) % 7; - CHECK_XPATH_NODESET(c, STR("a/num/text()[position()=1]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 30; - CHECK_XPATH_NODESET(c, STR("a/num/text()[position()=2]")) % 32; - CHECK_XPATH_NODESET(c, STR("a/num/text()[position()=last()]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 32; - CHECK_XPATH_NODESET(c, STR("a/num/text()[1]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 30; - CHECK_XPATH_NODESET(c, STR("a/num/text()[2]")) % 32; - CHECK_XPATH_NODESET(c, STR("a/num/text()[last()]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 32; - CHECK_XPATH_NODESET(c, STR("a[floor(last() div 3)]")) % 7; - CHECK_XPATH_NODESET(c, STR("a[ceiling(last() div 3)]")) % 10; - CHECK_XPATH_NODESET(c, STR("a[round(last() div 3)]")) % 10; - CHECK_XPATH_NODESET(c, STR("a[last() div 3]")); - CHECK_XPATH_NODESET(c, STR("a[last() div 2]")) % 13; - CHECK_XPATH_NODESET(c, STR("a[3]/../a[position()>=2 and position()<=4]")) % 7 % 10 % 13; - CHECK_XPATH_NUMBER(c, STR("count(a[position()>=2 and position()<=4]/num)"), 3); - CHECK_XPATH_NUMBER(c, STR("count(a/@*)"), 4); - CHECK_XPATH_NUMBER(c, STR("count(a/attribute::*)"), 4); - CHECK_XPATH_NODESET(c, STR("*[not(@test)][position()=last()]")) % 20; - CHECK_XPATH_NODESET(c, STR("*[not(@test)][last()]")) % 20; - CHECK_XPATH_NODESET(c, STR("a[3-2]")) % 3; - CHECK_XPATH_NODESET(c, STR("a[0]")); - CHECK_XPATH_NODESET(c, STR("a[9]")); - CHECK_XPATH_NODESET(c, STR("a['3']")) % 3 % 7 % 10 % 13 % 17 % 20 % 23 % 27; - CHECK_XPATH_NODESET(c, STR("a[number('3')]")) % 10; - CHECK_XPATH_NODESET(c, STR("processing-instruction()[2]")) % 34; - CHECK_XPATH_NODESET(c, STR("processing-instruction('pi')[2]")) % 34; - CHECK_XPATH_NODESET(c, STR("comment()[2]")) % 36; - CHECK_XPATH_NODESET(c, STR("a/*[last()]")) % 5 % 8 % 11 % 15 % 18 % 21 % 25 % 29; - CHECK_XPATH_NODESET(c, STR("a/child::*[last()]")) % 5 % 8 % 11 % 15 % 18 % 21 % 25 % 29; - CHECK_XPATH_NODESET(c, STR("a/descendant::*[last()]")) % 5 % 8 % 11 % 15 % 18 % 21 % 25 % 31; - CHECK_XPATH_NODESET(c, STR("a/child::node()[last()]")) % 5 % 8 % 11 % 15 % 18 % 21 % 25 % 29; - CHECK_XPATH_NODESET(c, STR("a/descendant::text()[last()]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 32; - CHECK_XPATH_NODESET(c, STR("child::comment()[last()]")) % 36; -} - -TEST_XML(xpath_xalan_position_3, "
AAA
BBBAbout this article
CCCThis is the section titled 'ZZZ'.DDDDon't worry.
EEEThis is the deep subsection.
") -{ - CHECK_XPATH_NODESET(doc, STR("(article//section/title|/articleinfo/title|article/section/para)[last()]")) % 28; - CHECK_XPATH_NODESET(doc, STR("(article//section/title|/articleinfo/title|article/section/para)[1]")) % 10; - CHECK_XPATH_NUMBER(doc, STR("count(article/articleinfo/section[last()])"), 1); - CHECK_XPATH_NUMBER(doc, STR("count(article/articleinfo/section[last()][title='BBB'])"), 1); -} - -TEST_XML(xpath_xalan_position_4, "
hello
goodbyesayonara
aloha
") -{ - CHECK_XPATH_NODESET(doc, STR("chapter//footnote[1]")) % 4 % 7 % 12; -} - -TEST_XML(xpath_xalan_position_5, "
helloahoy
goodbyesayonaraadios
alohashalomyociao
") -{ - CHECK_XPATH_NODESET(doc, STR("chapter//footnote[2]")) % 6 % 11 % 21 % 23; - CHECK_XPATH_NODESET(doc, STR("(chapter//footnote)[2]")) % 6; - CHECK_XPATH_NODESET(doc, STR("(child::chapter/descendant-or-self::node())/footnote[2]")) % 6 % 11 % 21 % 23; - CHECK_XPATH_NODESET(doc, STR("chapter/descendant::footnote[6]")) % 16; - CHECK_XPATH_NODESET(doc, STR("chapter/descendant::footnote[6][1][last()]")) % 16; -} - -TEST_XML_FLAGS(xpath_xalan_position_6, "pcdata", parse_default | parse_pi | parse_comments) -{ - CHECK_XPATH_NUMBER(doc, STR("count(/node/@attr/ancestor-or-self::node())"), 3); - CHECK_XPATH_NUMBER(doc, STR("count(/node/text()/ancestor-or-self::node())"), 4); - CHECK_XPATH_NUMBER(doc, STR("count(/node/processing-instruction()/ancestor-or-self::node())"), 4); - CHECK_XPATH_NUMBER(doc, STR("count(/node/processing-instruction('pi1')/ancestor-or-self::node())"), 3); - CHECK_XPATH_NUMBER(doc, STR("count(/node/comment()/ancestor-or-self::node())"), 3); -} - -TEST_XML(xpath_xalan_position_7, "
helloahoy
goodbyesayonaraadios
alohaA3b-1A3b-2shalom
") -{ - CHECK_XPATH_NODESET(doc, STR("chapter/section//@title[7]")); - CHECK_XPATH_NODESET(doc, STR("(chapter/section//@title)[7]")) % 21; -} - -TEST_XML(xpath_xalan_match_1, "") -{ - xml_node c = doc.child(STR("root")); - - CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][position() > 3]")) % 21 % 27 % 33; - CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][position() > 3][position()=2]")) % 27; - CHECK_XPATH_NODESET(c, STR("x[(position() mod 2) > 0][position() > 3][2]")) % 27; - CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][position() > 3][last()]")) % 33; - CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][@num > 5][last()]")) % 33; - CHECK_XPATH_NODESET(c, STR("x[(@num mod 3)=2][position() > 2][last()]")) % 33; - CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][2][@num < 10]")) % 9; - CHECK_XPATH_NODESET(c, STR("x[(((((2*10)-4)+9) div 5) mod 3)]")) % 6; -} - - -TEST_XML(xpath_xalan_match_2, "doc-l1-v2doc-l1-x2doc-l1-l2-v3doc-l1-l2-w3doc-l1-l2-x3doc-l1-l2-y3doc-l1-l2-l3-v4doc-l1-l2-l3-x4") -{ - CHECK_XPATH_STRING(doc, STR("doc/l1/v2"), STR("doc-l1-v2")); - CHECK_XPATH_STRING(doc, STR("doc/child::l1/x2"), STR("doc-l1-x2")); - CHECK_XPATH_STRING(doc, STR("doc/l1//v3"), STR("doc-l1-l2-v3")); - CHECK_XPATH_STRING(doc, STR("doc//l2/w3"), STR("doc-l1-l2-w3")); - CHECK_XPATH_STRING(doc, STR("doc/child::l1//x3"), STR("doc-l1-l2-x3")); - CHECK_XPATH_STRING(doc, STR("doc//child::l2/y3"), STR("doc-l1-l2-y3")); - CHECK_XPATH_STRING(doc, STR("doc//l2//v4"), STR("doc-l1-l2-l3-v4")); - CHECK_XPATH_STRING(doc, STR("doc//child::l2//x4"), STR("doc-l1-l2-l3-x4")); - - CHECK_XPATH_STRING(doc, STR("doc/l1/v2"), STR("doc-l1-v2")); - CHECK_XPATH_STRING(doc, STR("doc/l1/child::x2"), STR("doc-l1-x2")); - CHECK_XPATH_STRING(doc, STR("doc/l1//v3"), STR("doc-l1-l2-v3")); - CHECK_XPATH_STRING(doc, STR("doc//l2/w3"), STR("doc-l1-l2-w3")); - CHECK_XPATH_STRING(doc, STR("doc/l1//child::x3"), STR("doc-l1-l2-x3")); - CHECK_XPATH_STRING(doc, STR("doc//l2/child::y3"), STR("doc-l1-l2-y3")); - CHECK_XPATH_STRING(doc, STR("doc//l2//v4"), STR("doc-l1-l2-l3-v4")); - CHECK_XPATH_STRING(doc, STR("doc//l2//child::x4"), STR("doc-l1-l2-l3-x4")); - - CHECK_XPATH_STRING(doc, STR("doc/l1/v2"), STR("doc-l1-v2")); - CHECK_XPATH_STRING(doc, STR("doc/child::l1/child::x2"), STR("doc-l1-x2")); - CHECK_XPATH_STRING(doc, STR("doc/l1//v3"), STR("doc-l1-l2-v3")); - CHECK_XPATH_STRING(doc, STR("doc//l2/w3"), STR("doc-l1-l2-w3")); - CHECK_XPATH_STRING(doc, STR("doc/child::l1//child::x3"), STR("doc-l1-l2-x3")); - CHECK_XPATH_STRING(doc, STR("doc//child::l2/child::y3"), STR("doc-l1-l2-y3")); - CHECK_XPATH_STRING(doc, STR("doc//l2//v4"), STR("doc-l1-l2-l3-v4")); - CHECK_XPATH_STRING(doc, STR("doc//child::l2//child::x4"), STR("doc-l1-l2-l3-x4")); -} - -TEST_XML(xpath_xalan_match_3, "John DoeJane Doe") -{ - CHECK_XPATH_NODESET(doc, STR("doc/child/*[starts-with(name(),'child-')]//name")) % 5 % 9; - CHECK_XPATH_NODESET(doc, STR("//@*")) % 6 % 10; -} - -TEST_XML(xpath_xalan_expression_1, "en
en
ENen-us
") -{ - CHECK_XPATH_NODESET(doc, STR("doc/para[@id='1' and lang('en')]")) % 3; - CHECK_XPATH_NODESET(doc, STR("doc/para[@id='4' and lang('en')]")) % 15; - CHECK_XPATH_NODESET(doc, STR("doc/div/para[lang('en')]")) % 9; - CHECK_XPATH_NODESET(doc, STR("doc/para[@id='3' and lang('en')]")) % 11; - CHECK_XPATH_NODESET(doc, STR("//para[lang('en')]/ancestor-or-self::*[@xml:lang]/@xml:lang")) % 5 % 8 % 13 % 17; -} - -TEST_XML(xpath_xalan_predicate_1, "1234") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NODESET(c, STR("a[true()=4]")) % 3 % 5 % 7 % 9; - CHECK_XPATH_NODESET(c, STR("a[true()='stringwithchars']")) % 3 % 5 % 7 % 9; - CHECK_XPATH_NODESET(c, STR("a[true()=following-sibling::*]")) % 3 % 5 % 7; - CHECK_XPATH_NODESET(c, STR("a[true()=preceding-sibling::*]")) % 5 % 7 % 9; - CHECK_XPATH_NODESET(c, STR("a[3=following-sibling::*]")) % 3 % 5; - CHECK_XPATH_NODESET(c, STR("a[0 < true()]")) % 3 % 5 % 7 % 9; - CHECK_XPATH_NODESET(c, STR("a['3.5' < 4]")) % 3 % 5 % 7 % 9; - CHECK_XPATH_NODESET(c, STR("a[3 < following-sibling::*]")) % 3 % 5 % 7; - CHECK_XPATH_NODESET(c, STR("a[following-sibling::*>3]")) % 3 % 5 % 7; - CHECK_XPATH_NODESET(c, STR("a[3 > following-sibling::*]")) % 3; - CHECK_XPATH_NODESET(c, STR("a[following-sibling::*<3]")) % 3; - CHECK_XPATH_NODESET(c, STR("a[1 < 2 < 3]")) % 3 % 5 % 7 % 9; - CHECK_XPATH_NODESET(c, STR("a[1 < 3 < 2]")) % 3 % 5 % 7 % 9; - CHECK_XPATH_NODESET(c, STR("a[following-sibling::*=true()]")) % 3 % 5 % 7; - CHECK_XPATH_NODESET(c, STR("a[false()!=following-sibling::*]")) % 3 % 5 % 7; - CHECK_XPATH_NODESET(c, STR("a[following-sibling::*!=false()]")) % 3 % 5 % 7; - CHECK_XPATH_NODESET(c, STR("a[following-sibling::*=3]")) % 3 % 5; - CHECK_XPATH_NODESET(c, STR("a[3=following-sibling::*]")) % 3 % 5; - CHECK_XPATH_NODESET(c, STR("a[4!=following-sibling::*]")) % 3 % 5; - CHECK_XPATH_NODESET(c, STR("a[following-sibling::*!=4]")) % 3 % 5; - CHECK_XPATH_NODESET(c, STR("a[3>=following-sibling::*]")) % 3 % 5; - CHECK_XPATH_NODESET(c, STR("a[3<=following-sibling::*]")) % 3 % 5 % 7; - CHECK_XPATH_NODESET(c, STR("a[following-sibling::*<=3]")) % 3 % 5; - CHECK_XPATH_NODESET(c, STR("a[following-sibling::*>=3]")) % 3 % 5 % 7; -} - -TEST_XML(xpath_xalan_predicate_2, "") -{ - xml_node c = doc.child(STR("foo")); - - CHECK_XPATH_NODESET(c, STR("bar[@a='1' and @b='1']")) % 75 % 81 % 87 % 93; - CHECK_XPATH_NODESET(c, STR("bar[(@a='1' or @b='1') and @c='1']")) % 39 % 45 % 63 % 69 % 87 % 93; - CHECK_XPATH_NODESET(c, STR("bar[@a='1' and (@b='1' or @c='1') and @d='1']")) % 69 % 81 % 93; - CHECK_XPATH_NODESET(c, STR("bar[@a='1' and @b='1' or @c='1' and @d='1']")) % 21 % 45 % 69 % 75 % 81 % 87 % 93; - CHECK_XPATH_NODESET(c, STR("bar[(@a='1' and @b='1') or (@c='1' and @d='1')]")) % 21 % 45 % 69 % 75 % 81 % 87 % 93; - CHECK_XPATH_NODESET(c, STR("bar[@a='1' or (@b='1' and @c='1') or @d='1']")) % 9 % 21 % 33 % 39 % 45 % 51 % 57 % 63 % 69 % 75 % 81 % 87 % 93; - CHECK_XPATH_NODESET(c, STR("bar[(@a='1' or @b='1') and (@c='1' or @d='1')]")) % 33 % 39 % 45 % 57 % 63 % 69 % 81 % 87 % 93; - CHECK_XPATH_NODESET(c, STR("bar[@a='1' or @b='1' and @c='1' or @d='1']")) % 9 % 21 % 33 % 39 % 45 % 51 % 57 % 63 % 69 % 75 % 81 % 87 % 93; - CHECK_XPATH_NODESET(c, STR("bar[@a='1' or @b='1' or @c='1']")) % 15 % 21 % 27 % 33 % 39 % 45 % 51 % 57 % 63 % 69 % 75 % 81 % 87 % 93; -} - -TEST_XML(xpath_xalan_predicate_3, "12345") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NUMBER(c, STR("count(a[@ex])"), 2); - CHECK_XPATH_NUMBER(c, STR("count(a[@ex=''])"), 1); - CHECK_XPATH_NUMBER(c, STR("count(a[string-length(@ex)=0])"), 4); - CHECK_XPATH_NUMBER(c, STR("count(a[@ex!=''])"), 1); - CHECK_XPATH_NUMBER(c, STR("count(a[string-length(@ex) > 0])"), 1); - CHECK_XPATH_NUMBER(c, STR("count(a[not(@ex)])"), 3); - CHECK_XPATH_NUMBER(c, STR("count(a[not(@ex='')])"), 4); - CHECK_XPATH_NUMBER(c, STR("count(a[not(string-length(@ex)=0)])"), 1); - CHECK_XPATH_NUMBER(c, STR("count(a[@why='value'])"), 1); - CHECK_XPATH_NUMBER(c, STR("count(a[@why!='value'])"), 1); -} - -TEST_XML(xpath_xalan_predicate_4, "
configuration".(split /\s+/)[0]."
$define" . ($present ? "+" : " ") . "
$platform$toolset pass"; + + if ($coverage_pugixml > 0 || $coverage_pugixpath > 0) + { + print "
" . ($coverage_pugixml + 0) . "%
" . ($coverage_pugixpath + 0) . "%
"; + } + + print "
fail
text1text2text3text4
text1text2text3text4
1.11.2
2.12.22.3
3.13.23.2.1
44.14.1.1
5.15.25.35.4
6.16.2
7.17.27.3
8.18.28.38.4
") -{ - CHECK_XPATH_NUMBER(doc, STR("count(//tr)"), 8); - CHECK_XPATH_NUMBER(doc, STR("count(//tr[count(./td)=3])"), 2); -} - -TEST_XML(xpath_xalan_predicate_5, "Wrong node selected!!Test executed successfullyWrong node selected!!") -{ - CHECK_XPATH_STRING(doc, STR("doc/element1[(((((2*10)-4)+9) div 5) mod 3 )]"), STR("Test executed successfully")); - CHECK_XPATH_STRING(doc, STR("doc/element1[(((((2*10)-4)+9) div 5) mod floor(3))]"), STR("Test executed successfully")); - CHECK_XPATH_STRING(doc, STR("doc/element1[floor(2)]"), STR("Test executed successfully")); -} - -TEST_XML(xpath_xalan_predicate_6, "12target34") -{ - CHECK_XPATH_STRING(doc, STR("doc/a['target'=descendant::*]"), STR("2target")); - CHECK_XPATH_STRING(doc, STR("doc/a[descendant::*='target']"), STR("2target")); -} - -TEST_XML(xpath_xalan_predicate_7, "12target34missed") -{ - CHECK_XPATH_STRING(doc, STR("doc/a['target'!=descendant::*]"), STR("4missed")); - CHECK_XPATH_STRING(doc, STR("doc/a[descendant::*!='target']"), STR("4missed")); -} - -TEST_XML(xpath_xalan_predicate_8, "this23thisthisthathellogoodbyethisthatthishellogoodbyeother") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NODESET(c, STR("foo[(bar[2])='this']")) % 13; - CHECK_XPATH_NODESET(c, STR("foo[(bar[(baz[2])='goodbye'])]")) % 23 % 38; - CHECK_XPATH_NODESET(c, STR("foo[(bar[2][(baz[2])='goodbye'])]")) % 38; -} - -TEST_XML(xpath_xalan_predicate_9, "f-insidef-insidef-insidef-insidef-inside") -{ - CHECK_XPATH_NODESET(doc, STR("doc/*[starts-with(name(.),'f')]")) % 23; - CHECK_XPATH_NODESET(doc, STR("//*[starts-with(name(.),'f')]")) % 8 % 15 % 23 % 24; -} - -TEST_XML(xpath_xalan_predicate_10, "Text from first elementText from child1 of first elementText from child2 of first elementText from second elementText from child1 of second elementText from child2 of second element (correct execution)") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_STRING(c, STR("//child2[ancestor::element2]"), STR("Text from child2 of second element (correct execution)")); - CHECK_XPATH_STRING(c, STR("//child2[ancestor-or-self::element2]"), STR("Text from child2 of second element (correct execution)")); - CHECK_XPATH_STRING(c, STR("//child2[attribute::attr1]"), STR("Text from child2 of second element (correct execution)")); -} - -TEST_XML(xpath_xalan_predicate_11, "123456") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NODESET(c, STR("a[@squeesh or (@squish and @squash)]")) % 3 % 7 % 11 % 18; - CHECK_XPATH_NODESET(c, STR("a[(@squeesh or @squish) and @squash]")) % 3 % 11; - CHECK_XPATH_NODESET(c, STR("a[@squeesh or @squish and @squash]")) % 3 % 7 % 11 % 18; -} - -TEST_XML(xpath_xalan_predicate_12, "12target3target") -{ - CHECK_XPATH_STRING(doc, STR("doc/a[following-sibling::*=descendant::*]"), STR("2target")); -} - -TEST_XML(xpath_xalan_predicate_13, "12target3") -{ - CHECK_XPATH_NODESET(doc, STR("doc/a[('target'=descendant::*) or @squish]")) % 3 % 6; - CHECK_XPATH_NODESET(doc, STR("doc/a[not(('target'=descendant::*) or @squish)]")) % 10; -} - -TEST_XML(xpath_xalan_predicate_14, "12child234child4") -{ - CHECK_XPATH_NODESET(doc, STR("doc/a[not(@*)]")) % 6 % 11; -} - -TEST_XML(xpath_xalan_predicate_15, "xinside") -{ - CHECK_XPATH_NODESET(doc, STR("doc/descendant::*[string-length(name(.))=1]")) % 3 % 6 % 9 % 11 % 13; -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +TEST_XML(xpath_xalan_position_1, "1234") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_BOOLEAN(c, STR("position()=1"), true); + CHECK_XPATH_NODESET(c, STR("*[position()=4]")) % 9; +} + +TEST_XML_FLAGS(xpath_xalan_position_2, "111912632827256345", parse_default | parse_comments | parse_pi) +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NODESET(c, STR("*[@test and position()=8]")) % 27; + CHECK_XPATH_NODESET(c, STR("*[@test][position()=4]/num")) % 29; + CHECK_XPATH_NUMBER(c, STR("count(*)"), 8); + CHECK_XPATH_NODESET(c, STR("*[last()=position()]")) % 27; + CHECK_XPATH_NODESET(c, STR("a[position()=2]")) % 7; + CHECK_XPATH_NODESET(c, STR("a[3]/../a[position()=4]/num/../@test")) % 14; + CHECK_XPATH_BOOLEAN(c, STR("not(position()=last())"), false); + CHECK_XPATH_BOOLEAN(c, STR("position()=2"), false); + CHECK_XPATH_BOOLEAN(c, STR("last()=1"), true); + CHECK_XPATH_BOOLEAN(c, STR("last()+2=3"), true); + CHECK_XPATH_NODESET(c, STR("a[position()=5 mod 3]")) % 7; + CHECK_XPATH_NODESET(c, STR("a/num/text()[position()=1]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 30; + CHECK_XPATH_NODESET(c, STR("a/num/text()[position()=2]")) % 32; + CHECK_XPATH_NODESET(c, STR("a/num/text()[position()=last()]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 32; + CHECK_XPATH_NODESET(c, STR("a/num/text()[1]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 30; + CHECK_XPATH_NODESET(c, STR("a/num/text()[2]")) % 32; + CHECK_XPATH_NODESET(c, STR("a/num/text()[last()]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 32; + CHECK_XPATH_NODESET(c, STR("a[floor(last() div 3)]")) % 7; + CHECK_XPATH_NODESET(c, STR("a[ceiling(last() div 3)]")) % 10; + CHECK_XPATH_NODESET(c, STR("a[round(last() div 3)]")) % 10; + CHECK_XPATH_NODESET(c, STR("a[last() div 3]")); + CHECK_XPATH_NODESET(c, STR("a[last() div 2]")) % 13; + CHECK_XPATH_NODESET(c, STR("a[3]/../a[position()>=2 and position()<=4]")) % 7 % 10 % 13; + CHECK_XPATH_NUMBER(c, STR("count(a[position()>=2 and position()<=4]/num)"), 3); + CHECK_XPATH_NUMBER(c, STR("count(a/@*)"), 4); + CHECK_XPATH_NUMBER(c, STR("count(a/attribute::*)"), 4); + CHECK_XPATH_NODESET(c, STR("*[not(@test)][position()=last()]")) % 20; + CHECK_XPATH_NODESET(c, STR("*[not(@test)][last()]")) % 20; + CHECK_XPATH_NODESET(c, STR("a[3-2]")) % 3; + CHECK_XPATH_NODESET(c, STR("a[0]")); + CHECK_XPATH_NODESET(c, STR("a[9]")); + CHECK_XPATH_NODESET(c, STR("a['3']")) % 3 % 7 % 10 % 13 % 17 % 20 % 23 % 27; + CHECK_XPATH_NODESET(c, STR("a[number('3')]")) % 10; + CHECK_XPATH_NODESET(c, STR("processing-instruction()[2]")) % 34; + CHECK_XPATH_NODESET(c, STR("processing-instruction('pi')[2]")) % 34; + CHECK_XPATH_NODESET(c, STR("comment()[2]")) % 36; + CHECK_XPATH_NODESET(c, STR("a/*[last()]")) % 5 % 8 % 11 % 15 % 18 % 21 % 25 % 29; + CHECK_XPATH_NODESET(c, STR("a/child::*[last()]")) % 5 % 8 % 11 % 15 % 18 % 21 % 25 % 29; + CHECK_XPATH_NODESET(c, STR("a/descendant::*[last()]")) % 5 % 8 % 11 % 15 % 18 % 21 % 25 % 31; + CHECK_XPATH_NODESET(c, STR("a/child::node()[last()]")) % 5 % 8 % 11 % 15 % 18 % 21 % 25 % 29; + CHECK_XPATH_NODESET(c, STR("a/descendant::text()[last()]")) % 6 % 9 % 12 % 16 % 19 % 22 % 26 % 32; + CHECK_XPATH_NODESET(c, STR("child::comment()[last()]")) % 36; +} + +TEST_XML(xpath_xalan_position_3, "
AAA
BBBAbout this article
CCCThis is the section titled 'ZZZ'.DDDDon't worry.
EEEThis is the deep subsection.
") +{ + CHECK_XPATH_NODESET(doc, STR("(article//section/title|/articleinfo/title|article/section/para)[last()]")) % 28; + CHECK_XPATH_NODESET(doc, STR("(article//section/title|/articleinfo/title|article/section/para)[1]")) % 10; + CHECK_XPATH_NUMBER(doc, STR("count(article/articleinfo/section[last()])"), 1); + CHECK_XPATH_NUMBER(doc, STR("count(article/articleinfo/section[last()][title='BBB'])"), 1); +} + +TEST_XML(xpath_xalan_position_4, "
hello
goodbyesayonara
aloha
") +{ + CHECK_XPATH_NODESET(doc, STR("chapter//footnote[1]")) % 4 % 7 % 12; +} + +TEST_XML(xpath_xalan_position_5, "
helloahoy
goodbyesayonaraadios
alohashalomyociao
") +{ + CHECK_XPATH_NODESET(doc, STR("chapter//footnote[2]")) % 6 % 11 % 21 % 23; + CHECK_XPATH_NODESET(doc, STR("(chapter//footnote)[2]")) % 6; + CHECK_XPATH_NODESET(doc, STR("(child::chapter/descendant-or-self::node())/footnote[2]")) % 6 % 11 % 21 % 23; + CHECK_XPATH_NODESET(doc, STR("chapter/descendant::footnote[6]")) % 16; + CHECK_XPATH_NODESET(doc, STR("chapter/descendant::footnote[6][1][last()]")) % 16; +} + +TEST_XML_FLAGS(xpath_xalan_position_6, "pcdata", parse_default | parse_pi | parse_comments) +{ + CHECK_XPATH_NUMBER(doc, STR("count(/node/@attr/ancestor-or-self::node())"), 3); + CHECK_XPATH_NUMBER(doc, STR("count(/node/text()/ancestor-or-self::node())"), 4); + CHECK_XPATH_NUMBER(doc, STR("count(/node/processing-instruction()/ancestor-or-self::node())"), 4); + CHECK_XPATH_NUMBER(doc, STR("count(/node/processing-instruction('pi1')/ancestor-or-self::node())"), 3); + CHECK_XPATH_NUMBER(doc, STR("count(/node/comment()/ancestor-or-self::node())"), 3); +} + +TEST_XML(xpath_xalan_position_7, "
helloahoy
goodbyesayonaraadios
alohaA3b-1A3b-2shalom
") +{ + CHECK_XPATH_NODESET(doc, STR("chapter/section//@title[7]")); + CHECK_XPATH_NODESET(doc, STR("(chapter/section//@title)[7]")) % 21; +} + +TEST_XML(xpath_xalan_match_1, "") +{ + xml_node c = doc.child(STR("root")); + + CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][position() > 3]")) % 21 % 27 % 33; + CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][position() > 3][position()=2]")) % 27; + CHECK_XPATH_NODESET(c, STR("x[(position() mod 2) > 0][position() > 3][2]")) % 27; + CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][position() > 3][last()]")) % 33; + CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][@num > 5][last()]")) % 33; + CHECK_XPATH_NODESET(c, STR("x[(@num mod 3)=2][position() > 2][last()]")) % 33; + CHECK_XPATH_NODESET(c, STR("x[(position() mod 2)=1][2][@num < 10]")) % 9; + CHECK_XPATH_NODESET(c, STR("x[(((((2*10)-4)+9) div 5) mod 3)]")) % 6; +} + + +TEST_XML(xpath_xalan_match_2, "doc-l1-v2doc-l1-x2doc-l1-l2-v3doc-l1-l2-w3doc-l1-l2-x3doc-l1-l2-y3doc-l1-l2-l3-v4doc-l1-l2-l3-x4") +{ + CHECK_XPATH_STRING(doc, STR("doc/l1/v2"), STR("doc-l1-v2")); + CHECK_XPATH_STRING(doc, STR("doc/child::l1/x2"), STR("doc-l1-x2")); + CHECK_XPATH_STRING(doc, STR("doc/l1//v3"), STR("doc-l1-l2-v3")); + CHECK_XPATH_STRING(doc, STR("doc//l2/w3"), STR("doc-l1-l2-w3")); + CHECK_XPATH_STRING(doc, STR("doc/child::l1//x3"), STR("doc-l1-l2-x3")); + CHECK_XPATH_STRING(doc, STR("doc//child::l2/y3"), STR("doc-l1-l2-y3")); + CHECK_XPATH_STRING(doc, STR("doc//l2//v4"), STR("doc-l1-l2-l3-v4")); + CHECK_XPATH_STRING(doc, STR("doc//child::l2//x4"), STR("doc-l1-l2-l3-x4")); + + CHECK_XPATH_STRING(doc, STR("doc/l1/v2"), STR("doc-l1-v2")); + CHECK_XPATH_STRING(doc, STR("doc/l1/child::x2"), STR("doc-l1-x2")); + CHECK_XPATH_STRING(doc, STR("doc/l1//v3"), STR("doc-l1-l2-v3")); + CHECK_XPATH_STRING(doc, STR("doc//l2/w3"), STR("doc-l1-l2-w3")); + CHECK_XPATH_STRING(doc, STR("doc/l1//child::x3"), STR("doc-l1-l2-x3")); + CHECK_XPATH_STRING(doc, STR("doc//l2/child::y3"), STR("doc-l1-l2-y3")); + CHECK_XPATH_STRING(doc, STR("doc//l2//v4"), STR("doc-l1-l2-l3-v4")); + CHECK_XPATH_STRING(doc, STR("doc//l2//child::x4"), STR("doc-l1-l2-l3-x4")); + + CHECK_XPATH_STRING(doc, STR("doc/l1/v2"), STR("doc-l1-v2")); + CHECK_XPATH_STRING(doc, STR("doc/child::l1/child::x2"), STR("doc-l1-x2")); + CHECK_XPATH_STRING(doc, STR("doc/l1//v3"), STR("doc-l1-l2-v3")); + CHECK_XPATH_STRING(doc, STR("doc//l2/w3"), STR("doc-l1-l2-w3")); + CHECK_XPATH_STRING(doc, STR("doc/child::l1//child::x3"), STR("doc-l1-l2-x3")); + CHECK_XPATH_STRING(doc, STR("doc//child::l2/child::y3"), STR("doc-l1-l2-y3")); + CHECK_XPATH_STRING(doc, STR("doc//l2//v4"), STR("doc-l1-l2-l3-v4")); + CHECK_XPATH_STRING(doc, STR("doc//child::l2//child::x4"), STR("doc-l1-l2-l3-x4")); +} + +TEST_XML(xpath_xalan_match_3, "John DoeJane Doe") +{ + CHECK_XPATH_NODESET(doc, STR("doc/child/*[starts-with(name(),'child-')]//name")) % 5 % 9; + CHECK_XPATH_NODESET(doc, STR("//@*")) % 6 % 10; +} + +TEST_XML(xpath_xalan_expression_1, "en
en
ENen-us
") +{ + CHECK_XPATH_NODESET(doc, STR("doc/para[@id='1' and lang('en')]")) % 3; + CHECK_XPATH_NODESET(doc, STR("doc/para[@id='4' and lang('en')]")) % 15; + CHECK_XPATH_NODESET(doc, STR("doc/div/para[lang('en')]")) % 9; + CHECK_XPATH_NODESET(doc, STR("doc/para[@id='3' and lang('en')]")) % 11; + CHECK_XPATH_NODESET(doc, STR("//para[lang('en')]/ancestor-or-self::*[@xml:lang]/@xml:lang")) % 5 % 8 % 13 % 17; +} + +TEST_XML(xpath_xalan_predicate_1, "1234") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NODESET(c, STR("a[true()=4]")) % 3 % 5 % 7 % 9; + CHECK_XPATH_NODESET(c, STR("a[true()='stringwithchars']")) % 3 % 5 % 7 % 9; + CHECK_XPATH_NODESET(c, STR("a[true()=following-sibling::*]")) % 3 % 5 % 7; + CHECK_XPATH_NODESET(c, STR("a[true()=preceding-sibling::*]")) % 5 % 7 % 9; + CHECK_XPATH_NODESET(c, STR("a[3=following-sibling::*]")) % 3 % 5; + CHECK_XPATH_NODESET(c, STR("a[0 < true()]")) % 3 % 5 % 7 % 9; + CHECK_XPATH_NODESET(c, STR("a['3.5' < 4]")) % 3 % 5 % 7 % 9; + CHECK_XPATH_NODESET(c, STR("a[3 < following-sibling::*]")) % 3 % 5 % 7; + CHECK_XPATH_NODESET(c, STR("a[following-sibling::*>3]")) % 3 % 5 % 7; + CHECK_XPATH_NODESET(c, STR("a[3 > following-sibling::*]")) % 3; + CHECK_XPATH_NODESET(c, STR("a[following-sibling::*<3]")) % 3; + CHECK_XPATH_NODESET(c, STR("a[1 < 2 < 3]")) % 3 % 5 % 7 % 9; + CHECK_XPATH_NODESET(c, STR("a[1 < 3 < 2]")) % 3 % 5 % 7 % 9; + CHECK_XPATH_NODESET(c, STR("a[following-sibling::*=true()]")) % 3 % 5 % 7; + CHECK_XPATH_NODESET(c, STR("a[false()!=following-sibling::*]")) % 3 % 5 % 7; + CHECK_XPATH_NODESET(c, STR("a[following-sibling::*!=false()]")) % 3 % 5 % 7; + CHECK_XPATH_NODESET(c, STR("a[following-sibling::*=3]")) % 3 % 5; + CHECK_XPATH_NODESET(c, STR("a[3=following-sibling::*]")) % 3 % 5; + CHECK_XPATH_NODESET(c, STR("a[4!=following-sibling::*]")) % 3 % 5; + CHECK_XPATH_NODESET(c, STR("a[following-sibling::*!=4]")) % 3 % 5; + CHECK_XPATH_NODESET(c, STR("a[3>=following-sibling::*]")) % 3 % 5; + CHECK_XPATH_NODESET(c, STR("a[3<=following-sibling::*]")) % 3 % 5 % 7; + CHECK_XPATH_NODESET(c, STR("a[following-sibling::*<=3]")) % 3 % 5; + CHECK_XPATH_NODESET(c, STR("a[following-sibling::*>=3]")) % 3 % 5 % 7; +} + +TEST_XML(xpath_xalan_predicate_2, "") +{ + xml_node c = doc.child(STR("foo")); + + CHECK_XPATH_NODESET(c, STR("bar[@a='1' and @b='1']")) % 75 % 81 % 87 % 93; + CHECK_XPATH_NODESET(c, STR("bar[(@a='1' or @b='1') and @c='1']")) % 39 % 45 % 63 % 69 % 87 % 93; + CHECK_XPATH_NODESET(c, STR("bar[@a='1' and (@b='1' or @c='1') and @d='1']")) % 69 % 81 % 93; + CHECK_XPATH_NODESET(c, STR("bar[@a='1' and @b='1' or @c='1' and @d='1']")) % 21 % 45 % 69 % 75 % 81 % 87 % 93; + CHECK_XPATH_NODESET(c, STR("bar[(@a='1' and @b='1') or (@c='1' and @d='1')]")) % 21 % 45 % 69 % 75 % 81 % 87 % 93; + CHECK_XPATH_NODESET(c, STR("bar[@a='1' or (@b='1' and @c='1') or @d='1']")) % 9 % 21 % 33 % 39 % 45 % 51 % 57 % 63 % 69 % 75 % 81 % 87 % 93; + CHECK_XPATH_NODESET(c, STR("bar[(@a='1' or @b='1') and (@c='1' or @d='1')]")) % 33 % 39 % 45 % 57 % 63 % 69 % 81 % 87 % 93; + CHECK_XPATH_NODESET(c, STR("bar[@a='1' or @b='1' and @c='1' or @d='1']")) % 9 % 21 % 33 % 39 % 45 % 51 % 57 % 63 % 69 % 75 % 81 % 87 % 93; + CHECK_XPATH_NODESET(c, STR("bar[@a='1' or @b='1' or @c='1']")) % 15 % 21 % 27 % 33 % 39 % 45 % 51 % 57 % 63 % 69 % 75 % 81 % 87 % 93; +} + +TEST_XML(xpath_xalan_predicate_3, "12345") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NUMBER(c, STR("count(a[@ex])"), 2); + CHECK_XPATH_NUMBER(c, STR("count(a[@ex=''])"), 1); + CHECK_XPATH_NUMBER(c, STR("count(a[string-length(@ex)=0])"), 4); + CHECK_XPATH_NUMBER(c, STR("count(a[@ex!=''])"), 1); + CHECK_XPATH_NUMBER(c, STR("count(a[string-length(@ex) > 0])"), 1); + CHECK_XPATH_NUMBER(c, STR("count(a[not(@ex)])"), 3); + CHECK_XPATH_NUMBER(c, STR("count(a[not(@ex='')])"), 4); + CHECK_XPATH_NUMBER(c, STR("count(a[not(string-length(@ex)=0)])"), 1); + CHECK_XPATH_NUMBER(c, STR("count(a[@why='value'])"), 1); + CHECK_XPATH_NUMBER(c, STR("count(a[@why!='value'])"), 1); +} + +TEST_XML(xpath_xalan_predicate_4, "
1.11.2
2.12.22.3
3.13.23.2.1
44.14.1.1
5.15.25.35.4
6.16.2
7.17.27.3
8.18.28.38.4
") +{ + CHECK_XPATH_NUMBER(doc, STR("count(//tr)"), 8); + CHECK_XPATH_NUMBER(doc, STR("count(//tr[count(./td)=3])"), 2); +} + +TEST_XML(xpath_xalan_predicate_5, "Wrong node selected!!Test executed successfullyWrong node selected!!") +{ + CHECK_XPATH_STRING(doc, STR("doc/element1[(((((2*10)-4)+9) div 5) mod 3 )]"), STR("Test executed successfully")); + CHECK_XPATH_STRING(doc, STR("doc/element1[(((((2*10)-4)+9) div 5) mod floor(3))]"), STR("Test executed successfully")); + CHECK_XPATH_STRING(doc, STR("doc/element1[floor(2)]"), STR("Test executed successfully")); +} + +TEST_XML(xpath_xalan_predicate_6, "12target34") +{ + CHECK_XPATH_STRING(doc, STR("doc/a['target'=descendant::*]"), STR("2target")); + CHECK_XPATH_STRING(doc, STR("doc/a[descendant::*='target']"), STR("2target")); +} + +TEST_XML(xpath_xalan_predicate_7, "12target34missed") +{ + CHECK_XPATH_STRING(doc, STR("doc/a['target'!=descendant::*]"), STR("4missed")); + CHECK_XPATH_STRING(doc, STR("doc/a[descendant::*!='target']"), STR("4missed")); +} + +TEST_XML(xpath_xalan_predicate_8, "this23thisthisthathellogoodbyethisthatthishellogoodbyeother") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NODESET(c, STR("foo[(bar[2])='this']")) % 13; + CHECK_XPATH_NODESET(c, STR("foo[(bar[(baz[2])='goodbye'])]")) % 23 % 38; + CHECK_XPATH_NODESET(c, STR("foo[(bar[2][(baz[2])='goodbye'])]")) % 38; +} + +TEST_XML(xpath_xalan_predicate_9, "f-insidef-insidef-insidef-insidef-inside") +{ + CHECK_XPATH_NODESET(doc, STR("doc/*[starts-with(name(.),'f')]")) % 23; + CHECK_XPATH_NODESET(doc, STR("//*[starts-with(name(.),'f')]")) % 8 % 15 % 23 % 24; +} + +TEST_XML(xpath_xalan_predicate_10, "Text from first elementText from child1 of first elementText from child2 of first elementText from second elementText from child1 of second elementText from child2 of second element (correct execution)") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_STRING(c, STR("//child2[ancestor::element2]"), STR("Text from child2 of second element (correct execution)")); + CHECK_XPATH_STRING(c, STR("//child2[ancestor-or-self::element2]"), STR("Text from child2 of second element (correct execution)")); + CHECK_XPATH_STRING(c, STR("//child2[attribute::attr1]"), STR("Text from child2 of second element (correct execution)")); +} + +TEST_XML(xpath_xalan_predicate_11, "123456") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NODESET(c, STR("a[@squeesh or (@squish and @squash)]")) % 3 % 7 % 11 % 18; + CHECK_XPATH_NODESET(c, STR("a[(@squeesh or @squish) and @squash]")) % 3 % 11; + CHECK_XPATH_NODESET(c, STR("a[@squeesh or @squish and @squash]")) % 3 % 7 % 11 % 18; +} + +TEST_XML(xpath_xalan_predicate_12, "12target3target") +{ + CHECK_XPATH_STRING(doc, STR("doc/a[following-sibling::*=descendant::*]"), STR("2target")); +} + +TEST_XML(xpath_xalan_predicate_13, "12target3") +{ + CHECK_XPATH_NODESET(doc, STR("doc/a[('target'=descendant::*) or @squish]")) % 3 % 6; + CHECK_XPATH_NODESET(doc, STR("doc/a[not(('target'=descendant::*) or @squish)]")) % 10; +} + +TEST_XML(xpath_xalan_predicate_14, "12child234child4") +{ + CHECK_XPATH_NODESET(doc, STR("doc/a[not(@*)]")) % 6 % 11; +} + +TEST_XML(xpath_xalan_predicate_15, "xinside") +{ + CHECK_XPATH_NODESET(doc, STR("doc/descendant::*[string-length(name(.))=1]")) % 3 % 6 % 9 % 11 % 13; +} + +#endif diff --git a/tests/test_xpath_xalan_5.cpp b/tests/test_xpath_xalan_5.cpp index 3a71bc2..e6a4fb9 100644 --- a/tests/test_xpath_xalan_5.cpp +++ b/tests/test_xpath_xalan_5.cpp @@ -1,293 +1,293 @@ -#ifndef PUGIXML_NO_XPATH - -#include "common.hpp" - -TEST_XML(xpath_xalan_select_1, "") -{ - CHECK_XPATH_STRING(doc, STR("/doc/a/b/@attr"), STR("test")); -} - -TEST_XML(xpath_xalan_select_2, "doremifasolatidoG#AAbBbCC#D") -{ - xml_node c = doc.child(STR("doc")); - - // This should come out fasolatido: - CHECK_XPATH_NODESET(c, STR("fa")) % 12; - // This should come out doremifasolatido: - CHECK_XPATH_NODESET(c, STR("mi | do | fa | re")) % 3 % 6 % 8 % 12; - // This should come out do-do-remi-mi1-mi2fasolatido-fa--so-: - CHECK_XPATH_NODESET(c, STR("mi[@mi2='mi2'] | do | fa/so/@so | fa | mi/@* | re | fa/@fa | do/@do")) % 3 % 4 % 6 % 8 % 9 % 10 % 12 % 13 % 16; - // This should come out solatidoG#: - CHECK_XPATH_NODESET(c, STR(".//*[@so]")) % 15 % 23; - // This should come out relatidoABb: - CHECK_XPATH_NODESET(c, STR("*//la | //Bflat | re")) % 6 % 18 % 28 % 31; - // This should come out domitiACD: - CHECK_XPATH_NODESET(c, STR("fa/../mi | Aflat/natural/la | Csharp//* | /doc/do | *//ti")) % 3 % 8 % 20 % 28 % 34 % 37; -} - -TEST_XML(xpath_xalan_select_3, "preceding sibling number 1current nodefollowing sibling number 3cousin 1cousin 2cousin 3") -{ - CHECK_XPATH_NODESET(doc.child(STR("doc")).child(STR("sub1")).child(STR("child2")), STR("preceding-sibling::child1|//child3")) % 4 % 8 % 15; -} - -TEST_XML(xpath_xalan_select_4, "bad1bad2bad3bad4OKbad5") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NODESET(c, STR("child::sub")) % 11; - CHECK_XPATH_NODESET(c, STR("child ::sub")) % 11; - CHECK_XPATH_NODESET(c, STR("child:: sub")) % 11; - CHECK_XPATH_NODESET(c, STR("child :: sub")) % 11; -} - -TEST_XML_FLAGS(xpath_xalan_select_5, "bad0bad1bad2", parse_default | parse_comments) -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NODESET(c, STR("comment()")) % 4; - CHECK_XPATH_NODESET(c, STR("comment ()")) % 4; - CHECK_XPATH_NODESET(c, STR("comment ( ) ")) % 4; - CHECK_XPATH_NUMBER(c, STR("string-length()"), 12); - CHECK_XPATH_NUMBER(c, STR("string-length ()"), 12); - CHECK_XPATH_NUMBER(c, STR("string-length ( ) "), 12); -} - -TEST_XML(xpath_xalan_select_6, "
9
") -{ - xml_node c = doc.child(STR("div")); - - CHECK_XPATH_NUMBER(doc, STR("div +3"), 12); - CHECK_XPATH_NUMBER(doc, STR("* +3"), 12); - CHECK_XPATH_NUMBER(c, STR("@div - 5"), 15); - CHECK_XPATH_NUMBER(c, STR("@div -5"), 15); - CHECK_XPATH_NUMBER(c, STR("@div-5"), 12); - CHECK_XPATH_NUMBER(c, STR("@*-5"), 15); - CHECK_XPATH_NUMBER(doc, STR("16-div"), 7); - CHECK_XPATH_NUMBER(doc, STR("25-*"), 16); - CHECK_XPATH_NUMBER(doc, STR("54 div*"), 6); - CHECK_XPATH_NUMBER(doc, STR("(* - 4) div 2"), 2.5); - CHECK_XPATH_NUMBER(doc, STR("' 6 ' div 2"), 3); - CHECK_XPATH_NUMBER(doc, STR("' 6 '*div"), 54); - CHECK_XPATH_NUMBER(doc, STR("5.*."), 45); - CHECK_XPATH_NUMBER(doc, STR("5.+."), 14); -} - -TEST_XML(xpath_xalan_select_7, "
9
8
") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NUMBER(c, STR("attribute :: div"), 20); - CHECK_XPATH_NUMBER(c, STR("attribute :: *"), 20); - CHECK_XPATH_NUMBER(c, STR("attribute*(div - 4)"), 40); - CHECK_XPATH_NUMBER(c, STR("(* - 4)**"), 45); -} - -TEST_XML(xpath_xalan_select_8, "x
7
y
9
z
5
") -{ - CHECK_XPATH_NODESET(doc, STR("doc/a[div=9]")) % 7; -} - -TEST_XML(xpath_xalan_select_9, "7379929920") -{ - CHECK_XPATH_NODESET(doc, STR("doc/a[*=9]")) % 9 % 15 % 21; -} - -TEST_XML(xpath_xalan_select_10, "child1child2") -{ - CHECK_XPATH_NODESET(doc, STR("/doc/sub1/child1|/doc/sub2/child2")) % 4 % 7; - CHECK_XPATH_NODESET(doc.child(STR("doc")), STR("sub1/child1|/doc/sub2/child2")) % 4 % 7; - CHECK_XPATH_NODESET(doc.child(STR("doc")), STR("sub1/child1|sub2/child2")) % 4 % 7; - CHECK_XPATH_NODESET(doc, STR("//self::child1|//self::child2")) % 4 % 7; - CHECK_XPATH_NODESET(doc, STR("//child1|//child2")) % 4 % 7; - CHECK_XPATH_NODESET(doc, STR("//child1|//child2|//child3")) % 4 % 7 % 10; -} - -TEST_XML(xpath_xalan_select_11, "descendant number 1descendant number 2") -{ - CHECK_XPATH_NODESET(doc, STR("//child1/ancestor::sub1|//child1/ancestor::sub2")) % 3 % 7; -} - -TEST_XML(xpath_xalan_select_12, "child number 1grandchild number 1child number 2grandchild number 2child number 3grandchild number 3child number 4grandchild number 4") -{ - CHECK_XPATH_NODESET(doc, STR("//child/ancestor-or-self::sub | //child/ancestor-or-self::sub-sub")) % 3 % 7 % 15 % 19 % 31; -} - -TEST_XML(xpath_xalan_select_13, "Carmelo MontanezNineDavid MarstonSevenMary BradyTenLynne RosenthalFive") -{ - CHECK_XPATH_NODESET(doc, STR("doc/book/author[name/@real='no']|doc/book/author[name/@real='yes']")) % 4 % 20; - CHECK_XPATH_NODESET(doc, STR("doc/book/author[(name/@real='no' and position()=1)]|doc/book/author[(name/@real='yes' and position()=last())]")) % 4 % 20; - CHECK_XPATH_NODESET(doc, STR("doc/book/author[name='Mary Brady']|doc/book/author[name/@real='no']")) % 4 % 20; - CHECK_XPATH_NODESET(doc, STR("doc/book/author/name|doc/book/author/bibliography/author/name")) % 5 % 13 % 21 % 28; - CHECK_XPATH_NODESET(doc, STR("doc/book/author/name|doc/book/author/bibliography/author/chapters")) % 5 % 13 % 21 % 30; - CHECK_XPATH_NODESET(doc, STR("doc/book/author/name|doc/book/author/noElement")) % 5 % 13 % 21; - CHECK_XPATH_NODESET(doc, STR("//noChild1|//noChild2")); -} - -TEST_XML(xpath_xalan_select_14, "child number 1child number 2") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NODESET(c, STR("child::sub1|child::sub2")) % 3 % 7; - CHECK_XPATH_NODESET(c, STR("descendant::child1|descendant::child2")) % 5 % 9; - CHECK_XPATH_NODESET(c, STR("descendant-or-self::sub1|descendant-or-self::sub2")) % 3 % 7; - CHECK_XPATH_NODESET(c.child(STR("sub2")), STR("preceding-sibling::sub1|following-sibling::sub3")) % 3 % 11; -} - -TEST_XML(xpath_xalan_select_15, "Selection of this child is an error.Selection of this child is an error.Selection of this child is an error.Selection of this child is an error.EFGSelection of this child is an error.Selection of this child is an error.JKSelection of this child is an error.Selection of this child is an error.NOP") -{ - xml_node c = doc.child(STR("doc")); - - CHECK_XPATH_NODESET(c, STR("child[@wide='3']|child[@deep='3']")) % 15 % 18 % 22 % 35 % 39 % 51 % 54 % 58; - CHECK_XPATH_NODESET(c, STR("child[@deep='3']|child[@wide='3']")) % 15 % 18 % 22 % 35 % 39 % 51 % 54 % 58; -} - -TEST_XML(xpath_xalan_select_16, "1234") -{ - CHECK_XPATH_NUMBER(doc, STR("count(doc/a/attribute::*)"), 9); - CHECK_XPATH_NUMBER(doc, STR("count(//@*)"), 9); - CHECK_XPATH_NUMBER(doc, STR("count(//@squish)"), 3); -} - -TEST_XML(xpath_xalan_select_17, "") -{ - xml_node c = doc.child(STR("directions")); - - CHECK_XPATH_NODESET(c, STR("north/* | north/dup1 | north/dup2")) % 4 % 5 % 6 % 7 % 8; - CHECK_XPATH_NODESET(c, STR("north/dup2 | north/dup1 | north/*")) % 4 % 5 % 6 % 7 % 8; - CHECK_XPATH_NODESET(c, STR("//north/dup2 | south/preceding-sibling::*[4]/* | north/dup1 | north/*")) % 4 % 5 % 6 % 7 % 8; - CHECK_XPATH_NODESET(c, STR("north/dup2 | south/preceding-sibling::*[4]/* | north/*")) % 4 % 5 % 6 % 7 % 8; -} - -TEST_XML(xpath_xalan_select_18, "HelloThereWorld") -{ - CHECK_XPATH_NODESET(doc, STR("/para/font[@color='green']")) % 6; - CHECK_XPATH_NODESET(doc.child(STR("para")), STR("/para/font[@color='green']")) % 6; - CHECK_XPATH_NODESET(doc.child(STR("para")).last_child(), STR("/para/font[@color='green']")) % 6; -} - -TEST_XML_FLAGS(xpath_xalan_select_19, "1in-a2345678in-c9", parse_default | parse_comments | parse_pi) -{ - CHECK_XPATH_NODESET(doc, STR("//*")) % 2 % 4 % 8 % 10 % 12 % 18; - CHECK_XPATH_NODESET(doc, STR("//node()")) % 2 % 3 % 4 % 5 % 6 % 7 % 8 % 9 % 10 % 11 % 12 % 13 % 14 % 15 % 16 % 17 % 18 % 19 % 20 % 21; - CHECK_XPATH_NODESET(doc, STR("//text()")) % 3 % 5 % 6 % 9 % 11 % 13 % 14 % 15 % 17 % 19 % 20; - CHECK_XPATH_NODESET(doc, STR("//comment()")) % 7 % 16; - CHECK_XPATH_NODESET(doc, STR("//processing-instruction()")) % 21; -} - -TEST_XML(xpath_xalan_bugzilla_1, "15152252") -{ - CHECK_XPATH_NODESET(doc, STR("/report/colData[@colId='F' and not(.=preceding::colData)]")) % 3; -} - -TEST(xpath_xalan_error_boolean) -{ - CHECK_XPATH_FAIL(STR("nt(true())")); - CHECK_XPATH_FAIL(STR("not(troo())")); - CHECK_XPATH_FAIL(STR("troo() and (2 = 2)")); - CHECK_XPATH_FAIL(STR("troo() or (2 = 2)")); - CHECK_XPATH_FAIL(STR("2 = troo()")); - CHECK_XPATH_FAIL(STR("boolean(troo())")); - CHECK_XPATH_FAIL(STR("true(doc)")); - CHECK_XPATH_FAIL(STR("false(doc)")); - CHECK_XPATH_FAIL(STR("not()")); - CHECK_XPATH_FAIL(STR("not(false(), doc)")); - CHECK_XPATH_FAIL(STR("boolean()")); - CHECK_XPATH_FAIL(STR("boolean(false(), doc)")); - CHECK_XPATH_FAIL(STR("lang()")); - CHECK_XPATH_FAIL(STR("lang('en','us')")); -} - -TEST(xpath_xalan_error_conditional) -{ - CHECK_XPATH_FAIL(STR("")); - CHECK_XPATH_FAIL(STR("@name='John' | @name='Joe'")); - CHECK_XPATH_FAIL(STR("\x95not(name(.)='')")); -} - -TEST(xpath_xalan_error_match) -{ - CHECK_XPATH_FAIL(STR("//")); - CHECK_XPATH_FAIL(STR("section1|")); - CHECK_XPATH_FAIL(STR("|section1")); -} - -TEST(xpath_xalan_error_math) -{ - CHECK_XPATH_FAIL(STR("6 quo 4")); - CHECK_XPATH_FAIL(STR("-troo()")); - CHECK_XPATH_FAIL(STR("number(troo())")); - CHECK_XPATH_FAIL(STR("5 * troo()")); - CHECK_XPATH_FAIL(STR("12 div troo()")); - CHECK_XPATH_FAIL(STR("number(8,doc)")); - CHECK_XPATH_FAIL(STR("sum(doc, 8)")); - CHECK_XPATH_FAIL(STR("sum()")); - CHECK_XPATH_FAIL(STR("floor(8,7)")); - CHECK_XPATH_FAIL(STR("floor()")); - CHECK_XPATH_FAIL(STR("ceiling(8,7)")); - CHECK_XPATH_FAIL(STR("ceiling()")); - CHECK_XPATH_FAIL(STR("round(8,7)")); - CHECK_XPATH_FAIL(STR("round()")); -} - -TEST(xpath_xalan_error_namespace) -{ - CHECK_XPATH_FAIL(STR("local-name(baz2:b,..)")); - CHECK_XPATH_FAIL(STR("namespace-uri(baz2:b,..)")); - CHECK_XPATH_FAIL(STR("name(a,b)")); - CHECK_XPATH_FAIL(STR(":foo")); - CHECK_XPATH_FAIL(STR("*:foo")); -} - -TEST(xpath_xalan_error_position) -{ - CHECK_XPATH_FAIL(STR("*[last(*,2)]")); - CHECK_XPATH_FAIL(STR("position(b)=1")); - CHECK_XPATH_FAIL(STR("count()")); - CHECK_XPATH_FAIL(STR("count(*,4)")); - CHECK_XPATH_FAIL(STR("position()=last(a)")); -} - -TEST(xpath_xalan_error_select) -{ - CHECK_XPATH_FAIL(STR("")); - CHECK_XPATH_FAIL(STR("count(troo())")); - CHECK_XPATH_FAIL(STR("c::sub")); - CHECK_XPATH_FAIL(STR("c()")); - CHECK_XPATH_FAIL(STR("(* - 4) foo 2")); - CHECK_XPATH_FAIL(STR("5 . + *")); - CHECK_XPATH_FAIL(STR("4/.")); - CHECK_XPATH_FAIL(STR("true()/.")); - CHECK_XPATH_FAIL(STR("item//[@type='x']")); - CHECK_XPATH_FAIL(STR("//")); - CHECK_XPATH_FAIL(STR("item//")); - CHECK_XPATH_FAIL(STR("count(//)")); - CHECK_XPATH_FAIL(STR("substring-after(//,'0')")); - CHECK_XPATH_FAIL(STR("//+17")); - CHECK_XPATH_FAIL(STR("//|subitem")); - CHECK_XPATH_FAIL(STR("..[near-north]")); -} - -TEST(xpath_xalan_error_string) -{ - CHECK_XPATH_FAIL(STR("string(troo())")); - CHECK_XPATH_FAIL(STR("string-length(troo())")); - CHECK_XPATH_FAIL(STR("normalize-space(a,'\t\r\n ab cd ')")); - CHECK_XPATH_FAIL(STR("contains('ENCYCLOPEDIA')")); - CHECK_XPATH_FAIL(STR("contains('ENCYCLOPEDIA','LOPE',doc)")); - CHECK_XPATH_FAIL(STR("starts-with('ENCYCLOPEDIA')")); - CHECK_XPATH_FAIL(STR("starts-with('ENCYCLOPEDIA','LOPE',doc)")); - CHECK_XPATH_FAIL(STR("substring-before('ENCYCLOPEDIA')")); - CHECK_XPATH_FAIL(STR("substring-before('ENCYCLOPEDIA','LOPE',doc)")); - CHECK_XPATH_FAIL(STR("substring-after('ENCYCLOPEDIA')")); - CHECK_XPATH_FAIL(STR("substring-after('ENCYCLOPEDIA','LOPE',doc)")); - CHECK_XPATH_FAIL(STR("substring('ENCYCLOPEDIA')")); - CHECK_XPATH_FAIL(STR("substring('ENCYCLOPEDIA',4,5,2)")); - CHECK_XPATH_FAIL(STR("concat('x')")); - CHECK_XPATH_FAIL(STR("string-length('ENCYCLOPEDIA','PEDI')")); - CHECK_XPATH_FAIL(STR("translate('bar','abc')")); - CHECK_XPATH_FAIL(STR("translate('bar','abc','ABC','output')")); - CHECK_XPATH_FAIL(STR("string(22,44)")); - CHECK_XPATH_FAIL(STR("concat(/*)")); -} - -#endif +#ifndef PUGIXML_NO_XPATH + +#include "common.hpp" + +TEST_XML(xpath_xalan_select_1, "") +{ + CHECK_XPATH_STRING(doc, STR("/doc/a/b/@attr"), STR("test")); +} + +TEST_XML(xpath_xalan_select_2, "doremifasolatidoG#AAbBbCC#D") +{ + xml_node c = doc.child(STR("doc")); + + // This should come out fasolatido: + CHECK_XPATH_NODESET(c, STR("fa")) % 12; + // This should come out doremifasolatido: + CHECK_XPATH_NODESET(c, STR("mi | do | fa | re")) % 3 % 6 % 8 % 12; + // This should come out do-do-remi-mi1-mi2fasolatido-fa--so-: + CHECK_XPATH_NODESET(c, STR("mi[@mi2='mi2'] | do | fa/so/@so | fa | mi/@* | re | fa/@fa | do/@do")) % 3 % 4 % 6 % 8 % 9 % 10 % 12 % 13 % 16; + // This should come out solatidoG#: + CHECK_XPATH_NODESET(c, STR(".//*[@so]")) % 15 % 23; + // This should come out relatidoABb: + CHECK_XPATH_NODESET(c, STR("*//la | //Bflat | re")) % 6 % 18 % 28 % 31; + // This should come out domitiACD: + CHECK_XPATH_NODESET(c, STR("fa/../mi | Aflat/natural/la | Csharp//* | /doc/do | *//ti")) % 3 % 8 % 20 % 28 % 34 % 37; +} + +TEST_XML(xpath_xalan_select_3, "preceding sibling number 1current nodefollowing sibling number 3cousin 1cousin 2cousin 3") +{ + CHECK_XPATH_NODESET(doc.child(STR("doc")).child(STR("sub1")).child(STR("child2")), STR("preceding-sibling::child1|//child3")) % 4 % 8 % 15; +} + +TEST_XML(xpath_xalan_select_4, "bad1bad2bad3bad4OKbad5") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NODESET(c, STR("child::sub")) % 11; + CHECK_XPATH_NODESET(c, STR("child ::sub")) % 11; + CHECK_XPATH_NODESET(c, STR("child:: sub")) % 11; + CHECK_XPATH_NODESET(c, STR("child :: sub")) % 11; +} + +TEST_XML_FLAGS(xpath_xalan_select_5, "bad0bad1bad2", parse_default | parse_comments) +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NODESET(c, STR("comment()")) % 4; + CHECK_XPATH_NODESET(c, STR("comment ()")) % 4; + CHECK_XPATH_NODESET(c, STR("comment ( ) ")) % 4; + CHECK_XPATH_NUMBER(c, STR("string-length()"), 12); + CHECK_XPATH_NUMBER(c, STR("string-length ()"), 12); + CHECK_XPATH_NUMBER(c, STR("string-length ( ) "), 12); +} + +TEST_XML(xpath_xalan_select_6, "
9
") +{ + xml_node c = doc.child(STR("div")); + + CHECK_XPATH_NUMBER(doc, STR("div +3"), 12); + CHECK_XPATH_NUMBER(doc, STR("* +3"), 12); + CHECK_XPATH_NUMBER(c, STR("@div - 5"), 15); + CHECK_XPATH_NUMBER(c, STR("@div -5"), 15); + CHECK_XPATH_NUMBER(c, STR("@div-5"), 12); + CHECK_XPATH_NUMBER(c, STR("@*-5"), 15); + CHECK_XPATH_NUMBER(doc, STR("16-div"), 7); + CHECK_XPATH_NUMBER(doc, STR("25-*"), 16); + CHECK_XPATH_NUMBER(doc, STR("54 div*"), 6); + CHECK_XPATH_NUMBER(doc, STR("(* - 4) div 2"), 2.5); + CHECK_XPATH_NUMBER(doc, STR("' 6 ' div 2"), 3); + CHECK_XPATH_NUMBER(doc, STR("' 6 '*div"), 54); + CHECK_XPATH_NUMBER(doc, STR("5.*."), 45); + CHECK_XPATH_NUMBER(doc, STR("5.+."), 14); +} + +TEST_XML(xpath_xalan_select_7, "
9
8
") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NUMBER(c, STR("attribute :: div"), 20); + CHECK_XPATH_NUMBER(c, STR("attribute :: *"), 20); + CHECK_XPATH_NUMBER(c, STR("attribute*(div - 4)"), 40); + CHECK_XPATH_NUMBER(c, STR("(* - 4)**"), 45); +} + +TEST_XML(xpath_xalan_select_8, "x
7
y
9
z
5
") +{ + CHECK_XPATH_NODESET(doc, STR("doc/a[div=9]")) % 7; +} + +TEST_XML(xpath_xalan_select_9, "7379929920") +{ + CHECK_XPATH_NODESET(doc, STR("doc/a[*=9]")) % 9 % 15 % 21; +} + +TEST_XML(xpath_xalan_select_10, "child1child2") +{ + CHECK_XPATH_NODESET(doc, STR("/doc/sub1/child1|/doc/sub2/child2")) % 4 % 7; + CHECK_XPATH_NODESET(doc.child(STR("doc")), STR("sub1/child1|/doc/sub2/child2")) % 4 % 7; + CHECK_XPATH_NODESET(doc.child(STR("doc")), STR("sub1/child1|sub2/child2")) % 4 % 7; + CHECK_XPATH_NODESET(doc, STR("//self::child1|//self::child2")) % 4 % 7; + CHECK_XPATH_NODESET(doc, STR("//child1|//child2")) % 4 % 7; + CHECK_XPATH_NODESET(doc, STR("//child1|//child2|//child3")) % 4 % 7 % 10; +} + +TEST_XML(xpath_xalan_select_11, "descendant number 1descendant number 2") +{ + CHECK_XPATH_NODESET(doc, STR("//child1/ancestor::sub1|//child1/ancestor::sub2")) % 3 % 7; +} + +TEST_XML(xpath_xalan_select_12, "child number 1grandchild number 1child number 2grandchild number 2child number 3grandchild number 3child number 4grandchild number 4") +{ + CHECK_XPATH_NODESET(doc, STR("//child/ancestor-or-self::sub | //child/ancestor-or-self::sub-sub")) % 3 % 7 % 15 % 19 % 31; +} + +TEST_XML(xpath_xalan_select_13, "Carmelo MontanezNineDavid MarstonSevenMary BradyTenLynne RosenthalFive") +{ + CHECK_XPATH_NODESET(doc, STR("doc/book/author[name/@real='no']|doc/book/author[name/@real='yes']")) % 4 % 20; + CHECK_XPATH_NODESET(doc, STR("doc/book/author[(name/@real='no' and position()=1)]|doc/book/author[(name/@real='yes' and position()=last())]")) % 4 % 20; + CHECK_XPATH_NODESET(doc, STR("doc/book/author[name='Mary Brady']|doc/book/author[name/@real='no']")) % 4 % 20; + CHECK_XPATH_NODESET(doc, STR("doc/book/author/name|doc/book/author/bibliography/author/name")) % 5 % 13 % 21 % 28; + CHECK_XPATH_NODESET(doc, STR("doc/book/author/name|doc/book/author/bibliography/author/chapters")) % 5 % 13 % 21 % 30; + CHECK_XPATH_NODESET(doc, STR("doc/book/author/name|doc/book/author/noElement")) % 5 % 13 % 21; + CHECK_XPATH_NODESET(doc, STR("//noChild1|//noChild2")); +} + +TEST_XML(xpath_xalan_select_14, "child number 1child number 2") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NODESET(c, STR("child::sub1|child::sub2")) % 3 % 7; + CHECK_XPATH_NODESET(c, STR("descendant::child1|descendant::child2")) % 5 % 9; + CHECK_XPATH_NODESET(c, STR("descendant-or-self::sub1|descendant-or-self::sub2")) % 3 % 7; + CHECK_XPATH_NODESET(c.child(STR("sub2")), STR("preceding-sibling::sub1|following-sibling::sub3")) % 3 % 11; +} + +TEST_XML(xpath_xalan_select_15, "Selection of this child is an error.Selection of this child is an error.Selection of this child is an error.Selection of this child is an error.EFGSelection of this child is an error.Selection of this child is an error.JKSelection of this child is an error.Selection of this child is an error.NOP") +{ + xml_node c = doc.child(STR("doc")); + + CHECK_XPATH_NODESET(c, STR("child[@wide='3']|child[@deep='3']")) % 15 % 18 % 22 % 35 % 39 % 51 % 54 % 58; + CHECK_XPATH_NODESET(c, STR("child[@deep='3']|child[@wide='3']")) % 15 % 18 % 22 % 35 % 39 % 51 % 54 % 58; +} + +TEST_XML(xpath_xalan_select_16, "1234") +{ + CHECK_XPATH_NUMBER(doc, STR("count(doc/a/attribute::*)"), 9); + CHECK_XPATH_NUMBER(doc, STR("count(//@*)"), 9); + CHECK_XPATH_NUMBER(doc, STR("count(//@squish)"), 3); +} + +TEST_XML(xpath_xalan_select_17, "") +{ + xml_node c = doc.child(STR("directions")); + + CHECK_XPATH_NODESET(c, STR("north/* | north/dup1 | north/dup2")) % 4 % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(c, STR("north/dup2 | north/dup1 | north/*")) % 4 % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(c, STR("//north/dup2 | south/preceding-sibling::*[4]/* | north/dup1 | north/*")) % 4 % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(c, STR("north/dup2 | south/preceding-sibling::*[4]/* | north/*")) % 4 % 5 % 6 % 7 % 8; +} + +TEST_XML(xpath_xalan_select_18, "HelloThereWorld") +{ + CHECK_XPATH_NODESET(doc, STR("/para/font[@color='green']")) % 6; + CHECK_XPATH_NODESET(doc.child(STR("para")), STR("/para/font[@color='green']")) % 6; + CHECK_XPATH_NODESET(doc.child(STR("para")).last_child(), STR("/para/font[@color='green']")) % 6; +} + +TEST_XML_FLAGS(xpath_xalan_select_19, "1in-a2345678in-c9", parse_default | parse_comments | parse_pi) +{ + CHECK_XPATH_NODESET(doc, STR("//*")) % 2 % 4 % 8 % 10 % 12 % 18; + CHECK_XPATH_NODESET(doc, STR("//node()")) % 2 % 3 % 4 % 5 % 6 % 7 % 8 % 9 % 10 % 11 % 12 % 13 % 14 % 15 % 16 % 17 % 18 % 19 % 20 % 21; + CHECK_XPATH_NODESET(doc, STR("//text()")) % 3 % 5 % 6 % 9 % 11 % 13 % 14 % 15 % 17 % 19 % 20; + CHECK_XPATH_NODESET(doc, STR("//comment()")) % 7 % 16; + CHECK_XPATH_NODESET(doc, STR("//processing-instruction()")) % 21; +} + +TEST_XML(xpath_xalan_bugzilla_1, "15152252") +{ + CHECK_XPATH_NODESET(doc, STR("/report/colData[@colId='F' and not(.=preceding::colData)]")) % 3; +} + +TEST(xpath_xalan_error_boolean) +{ + CHECK_XPATH_FAIL(STR("nt(true())")); + CHECK_XPATH_FAIL(STR("not(troo())")); + CHECK_XPATH_FAIL(STR("troo() and (2 = 2)")); + CHECK_XPATH_FAIL(STR("troo() or (2 = 2)")); + CHECK_XPATH_FAIL(STR("2 = troo()")); + CHECK_XPATH_FAIL(STR("boolean(troo())")); + CHECK_XPATH_FAIL(STR("true(doc)")); + CHECK_XPATH_FAIL(STR("false(doc)")); + CHECK_XPATH_FAIL(STR("not()")); + CHECK_XPATH_FAIL(STR("not(false(), doc)")); + CHECK_XPATH_FAIL(STR("boolean()")); + CHECK_XPATH_FAIL(STR("boolean(false(), doc)")); + CHECK_XPATH_FAIL(STR("lang()")); + CHECK_XPATH_FAIL(STR("lang('en','us')")); +} + +TEST(xpath_xalan_error_conditional) +{ + CHECK_XPATH_FAIL(STR("")); + CHECK_XPATH_FAIL(STR("@name='John' | @name='Joe'")); + CHECK_XPATH_FAIL(STR("\x95not(name(.)='')")); +} + +TEST(xpath_xalan_error_match) +{ + CHECK_XPATH_FAIL(STR("//")); + CHECK_XPATH_FAIL(STR("section1|")); + CHECK_XPATH_FAIL(STR("|section1")); +} + +TEST(xpath_xalan_error_math) +{ + CHECK_XPATH_FAIL(STR("6 quo 4")); + CHECK_XPATH_FAIL(STR("-troo()")); + CHECK_XPATH_FAIL(STR("number(troo())")); + CHECK_XPATH_FAIL(STR("5 * troo()")); + CHECK_XPATH_FAIL(STR("12 div troo()")); + CHECK_XPATH_FAIL(STR("number(8,doc)")); + CHECK_XPATH_FAIL(STR("sum(doc, 8)")); + CHECK_XPATH_FAIL(STR("sum()")); + CHECK_XPATH_FAIL(STR("floor(8,7)")); + CHECK_XPATH_FAIL(STR("floor()")); + CHECK_XPATH_FAIL(STR("ceiling(8,7)")); + CHECK_XPATH_FAIL(STR("ceiling()")); + CHECK_XPATH_FAIL(STR("round(8,7)")); + CHECK_XPATH_FAIL(STR("round()")); +} + +TEST(xpath_xalan_error_namespace) +{ + CHECK_XPATH_FAIL(STR("local-name(baz2:b,..)")); + CHECK_XPATH_FAIL(STR("namespace-uri(baz2:b,..)")); + CHECK_XPATH_FAIL(STR("name(a,b)")); + CHECK_XPATH_FAIL(STR(":foo")); + CHECK_XPATH_FAIL(STR("*:foo")); +} + +TEST(xpath_xalan_error_position) +{ + CHECK_XPATH_FAIL(STR("*[last(*,2)]")); + CHECK_XPATH_FAIL(STR("position(b)=1")); + CHECK_XPATH_FAIL(STR("count()")); + CHECK_XPATH_FAIL(STR("count(*,4)")); + CHECK_XPATH_FAIL(STR("position()=last(a)")); +} + +TEST(xpath_xalan_error_select) +{ + CHECK_XPATH_FAIL(STR("")); + CHECK_XPATH_FAIL(STR("count(troo())")); + CHECK_XPATH_FAIL(STR("c::sub")); + CHECK_XPATH_FAIL(STR("c()")); + CHECK_XPATH_FAIL(STR("(* - 4) foo 2")); + CHECK_XPATH_FAIL(STR("5 . + *")); + CHECK_XPATH_FAIL(STR("4/.")); + CHECK_XPATH_FAIL(STR("true()/.")); + CHECK_XPATH_FAIL(STR("item//[@type='x']")); + CHECK_XPATH_FAIL(STR("//")); + CHECK_XPATH_FAIL(STR("item//")); + CHECK_XPATH_FAIL(STR("count(//)")); + CHECK_XPATH_FAIL(STR("substring-after(//,'0')")); + CHECK_XPATH_FAIL(STR("//+17")); + CHECK_XPATH_FAIL(STR("//|subitem")); + CHECK_XPATH_FAIL(STR("..[near-north]")); +} + +TEST(xpath_xalan_error_string) +{ + CHECK_XPATH_FAIL(STR("string(troo())")); + CHECK_XPATH_FAIL(STR("string-length(troo())")); + CHECK_XPATH_FAIL(STR("normalize-space(a,'\t\r\n ab cd ')")); + CHECK_XPATH_FAIL(STR("contains('ENCYCLOPEDIA')")); + CHECK_XPATH_FAIL(STR("contains('ENCYCLOPEDIA','LOPE',doc)")); + CHECK_XPATH_FAIL(STR("starts-with('ENCYCLOPEDIA')")); + CHECK_XPATH_FAIL(STR("starts-with('ENCYCLOPEDIA','LOPE',doc)")); + CHECK_XPATH_FAIL(STR("substring-before('ENCYCLOPEDIA')")); + CHECK_XPATH_FAIL(STR("substring-before('ENCYCLOPEDIA','LOPE',doc)")); + CHECK_XPATH_FAIL(STR("substring-after('ENCYCLOPEDIA')")); + CHECK_XPATH_FAIL(STR("substring-after('ENCYCLOPEDIA','LOPE',doc)")); + CHECK_XPATH_FAIL(STR("substring('ENCYCLOPEDIA')")); + CHECK_XPATH_FAIL(STR("substring('ENCYCLOPEDIA',4,5,2)")); + CHECK_XPATH_FAIL(STR("concat('x')")); + CHECK_XPATH_FAIL(STR("string-length('ENCYCLOPEDIA','PEDI')")); + CHECK_XPATH_FAIL(STR("translate('bar','abc')")); + CHECK_XPATH_FAIL(STR("translate('bar','abc','ABC','output')")); + CHECK_XPATH_FAIL(STR("string(22,44)")); + CHECK_XPATH_FAIL(STR("concat(/*)")); +} + +#endif diff --git a/tests/writer_string.cpp b/tests/writer_string.cpp index 878a103..f35b461 100644 --- a/tests/writer_string.cpp +++ b/tests/writer_string.cpp @@ -1,77 +1,77 @@ -#include "writer_string.hpp" - -#include "test.hpp" - -static bool test_narrow(const std::string& result, const char* expected, size_t length) -{ - // check result - if (result != std::string(expected, expected + length)) return false; - - // check comparison operator (incorrect implementation can theoretically early-out on zero terminators...) - if (length > 0 && result == std::string(expected, expected + length - 1) + "?") return false; - - return true; -} - -void xml_writer_string::write(const void* data, size_t size) -{ - contents += std::string(static_cast(data), size); -} - -std::string xml_writer_string::as_narrow() const -{ - return contents; -} - -std::wstring xml_writer_string::as_wide() const -{ - CHECK(contents.size() % sizeof(wchar_t) == 0); - - return std::wstring(reinterpret_cast(contents.data()), contents.size() / sizeof(wchar_t)); -} - -std::basic_string xml_writer_string::as_string() const -{ -#ifdef PUGIXML_WCHAR_MODE // to avoid "condition is always true" warning in BCC - CHECK(contents.size() % sizeof(pugi::char_t) == 0); -#endif - - return std::basic_string(reinterpret_cast(contents.data()), contents.size() / sizeof(pugi::char_t)); -} - -std::string save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding) -{ - xml_writer_string writer; - - doc.save(writer, STR(""), flags, encoding); - - return writer.as_narrow(); -} - -bool test_save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length) -{ - return test_narrow(save_narrow(doc, flags, encoding), expected, length); -} - -std::string write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding) -{ - xml_writer_string writer; - - node.print(writer, STR(""), flags, encoding); - - return writer.as_narrow(); -} - -bool test_write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length) -{ - return test_narrow(write_narrow(node, flags, encoding), expected, length); -} - -std::wstring write_wide(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding) -{ - xml_writer_string writer; - - node.print(writer, STR(""), flags, encoding); - - return writer.as_wide(); -} +#include "writer_string.hpp" + +#include "test.hpp" + +static bool test_narrow(const std::string& result, const char* expected, size_t length) +{ + // check result + if (result != std::string(expected, expected + length)) return false; + + // check comparison operator (incorrect implementation can theoretically early-out on zero terminators...) + if (length > 0 && result == std::string(expected, expected + length - 1) + "?") return false; + + return true; +} + +void xml_writer_string::write(const void* data, size_t size) +{ + contents += std::string(static_cast(data), size); +} + +std::string xml_writer_string::as_narrow() const +{ + return contents; +} + +std::wstring xml_writer_string::as_wide() const +{ + CHECK(contents.size() % sizeof(wchar_t) == 0); + + return std::wstring(reinterpret_cast(contents.data()), contents.size() / sizeof(wchar_t)); +} + +std::basic_string xml_writer_string::as_string() const +{ +#ifdef PUGIXML_WCHAR_MODE // to avoid "condition is always true" warning in BCC + CHECK(contents.size() % sizeof(pugi::char_t) == 0); +#endif + + return std::basic_string(reinterpret_cast(contents.data()), contents.size() / sizeof(pugi::char_t)); +} + +std::string save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding) +{ + xml_writer_string writer; + + doc.save(writer, STR(""), flags, encoding); + + return writer.as_narrow(); +} + +bool test_save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length) +{ + return test_narrow(save_narrow(doc, flags, encoding), expected, length); +} + +std::string write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding) +{ + xml_writer_string writer; + + node.print(writer, STR(""), flags, encoding); + + return writer.as_narrow(); +} + +bool test_write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length) +{ + return test_narrow(write_narrow(node, flags, encoding), expected, length); +} + +std::wstring write_wide(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding) +{ + xml_writer_string writer; + + node.print(writer, STR(""), flags, encoding); + + return writer.as_wide(); +} diff --git a/tests/writer_string.hpp b/tests/writer_string.hpp index acf6318..390f93b 100644 --- a/tests/writer_string.hpp +++ b/tests/writer_string.hpp @@ -1,27 +1,27 @@ -#ifndef HEADER_TEST_WRITER_STRING_HPP -#define HEADER_TEST_WRITER_STRING_HPP - -#include "../src/pugixml.hpp" - -#include - -struct xml_writer_string: public pugi::xml_writer -{ - std::string contents; - - virtual void write(const void* data, size_t size); - - std::string as_narrow() const; - std::wstring as_wide() const; - std::basic_string as_string() const; -}; - -std::string save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding); -bool test_save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length); - -std::string write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding); -bool test_write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length); - -std::wstring write_wide(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding); - -#endif +#ifndef HEADER_TEST_WRITER_STRING_HPP +#define HEADER_TEST_WRITER_STRING_HPP + +#include "../src/pugixml.hpp" + +#include + +struct xml_writer_string: public pugi::xml_writer +{ + std::string contents; + + virtual void write(const void* data, size_t size); + + std::string as_narrow() const; + std::wstring as_wide() const; + std::basic_string as_string() const; +}; + +std::string save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding); +bool test_save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length); + +std::string write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding); +bool test_write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length); + +std::wstring write_wide(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding); + +#endif -- cgit v1.2.3