From 7d24b9b5655d584b6dc8b89df7cbd58d2e940a81 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Mon, 19 Jul 2010 09:57:32 +0000 Subject: Set svn:eol-style to native for all text files git-svn-id: http://pugixml.googlecode.com/svn/trunk@607 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 9206 ++++++++++++++++++++++++++--------------------------- src/pugixml.hpp | 4756 +++++++++++++-------------- src/pugixpath.cpp | 7000 ++++++++++++++++++++-------------------- 3 files changed, 10481 insertions(+), 10481 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index ca3f009..7f36ada 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1,4603 +1,4603 @@ -/** - * pugixml parser - version 0.9 - * -------------------------------------------------------- - * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) - * Report bugs and download new versions at http://code.google.com/p/pugixml/ - * - * This library is distributed under the MIT License. See notice at the end - * of this file. - * - * This work is based on the pugxml parser, which is: - * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) - */ - -#include "pugixml.hpp" - -#include -#include -#include -#include -#include -#include - -#ifndef PUGIXML_NO_STL -# include -# include -# include -#endif - -// For placement new -#include - -#ifdef _MSC_VER -# pragma warning(disable: 4127) // conditional expression is constant -# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable -# pragma warning(disable: 4324) // structure was padded due to __declspec(align()) -# pragma warning(disable: 4996) // this function or variable may be unsafe -#endif - -#ifdef __INTEL_COMPILER -# pragma warning(disable: 177) // function was declared but never referenced -# pragma warning(disable: 1478 1786) // function was declared "deprecated" -#endif - -#ifdef __BORLANDC__ -# pragma warn -8008 // condition is always false -# pragma warn -8066 // unreachable code -#endif - -#ifdef __SNC__ -# pragma diag_suppress=178 // function waS declared but never referenced -#endif - -// uintptr_t -#if !defined(_MSC_VER) || _MSC_VER >= 1600 -# include -#else -# if _MSC_VER < 1300 -// No native uintptr_t in MSVC6 -typedef size_t uintptr_t; -# endif -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -#endif - -// Inlining controls -#if defined(_MSC_VER) && _MSC_VER >= 1300 -# define PUGIXML_NO_INLINE __declspec(noinline) -#elif defined(__GNUC__) -# define PUGIXML_NO_INLINE __attribute__((noinline)) -#else -# define PUGIXML_NO_INLINE -#endif - -// Simple static assertion -#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } - -// Memory allocation -namespace -{ - void* default_allocate(size_t size) - { - return malloc(size); - } - - void default_deallocate(void* ptr) - { - free(ptr); - } - - pugi::allocation_function global_allocate = default_allocate; - pugi::deallocation_function global_deallocate = default_deallocate; -} - -// String utilities prototypes -namespace pugi -{ - namespace impl - { - size_t strlen(const char_t* s); - bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); - void widen_ascii(wchar_t* dest, const char* source); - } -} - -// String utilities -namespace pugi -{ - namespace impl - { - // Get string length - size_t strlen(const char_t* s) - { - #ifdef PUGIXML_WCHAR_MODE - return wcslen(s); - #else - return ::strlen(s); - #endif - } - - // Compare two strings - bool PUGIXML_FUNCTION strequal(const char_t* src, const char_t* dst) - { - #ifdef PUGIXML_WCHAR_MODE - return wcscmp(src, dst) == 0; - #else - return strcmp(src, dst) == 0; - #endif - } - - // Compare lhs with [rhs_begin, rhs_end) - bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) - { - for (size_t i = 0; i < count; ++i) - if (lhs[i] != rhs[i]) - return false; - - return lhs[count] == 0; - } - - // Character set pattern match. - static bool strequalwild_cset(const char_t** src, const char_t** dst) - { - int find = 0, excl = 0, star = 0; - - if (**src == '!') - { - excl = 1; - ++(*src); - } - - while (**src != ']' || star == 1) - { - if (find == 0) - { - if (**src == '-' && *(*src-1) < *(*src+1) && *(*src+1) != ']' && star == 0) - { - if (**dst >= *(*src-1) && **dst <= *(*src+1)) - { - find = 1; - ++(*src); - } - } - else if (**src == **dst) find = 1; - } - ++(*src); - star = 0; - } - - if (excl == 1) find = (1 - find); - if (find == 1) ++(*dst); - - return find == 0; - } - - // Wildcard pattern match. - static bool strequalwild_astr(const char_t** src, const char_t** dst) - { - int find = 1; - ++(*src); - while ((**dst != 0 && **src == '?') || **src == '*') - { - if(**src == '?') ++(*dst); - ++(*src); - } - while (**src == '*') ++(*src); - if (**dst == 0 && **src != 0) return 0; - if (**dst == 0 && **src == 0) return 1; - else - { - if (!impl::strequalwild(*src,*dst)) - { - do - { - ++(*dst); - while(**src != **dst && **src != '[' && **dst != 0) - ++(*dst); - } - while ((**dst != 0) ? !impl::strequalwild(*src,*dst) : 0 != (find=0)); - } - if (**dst == 0 && **src == 0) find = 1; - return find == 0; - } - } - - // Compare two strings, with globbing, and character sets. - bool PUGIXML_FUNCTION strequalwild(const char_t* src, const char_t* dst) - { - int find = 1; - for(; *src != 0 && find == 1 && *dst != 0; ++src) - { - switch (*src) - { - case '?': ++dst; break; - case '[': ++src; find = !strequalwild_cset(&src,&dst); break; - case '*': find = !strequalwild_astr(&src,&dst); --src; break; - default : find = (int) (*src == *dst); ++dst; - } - } - while (*src == '*' && find == 1) ++src; - return (find == 1 && *dst == 0 && *src == 0); - } - -#ifdef PUGIXML_WCHAR_MODE - // Convert string to wide string, assuming all symbols are ASCII - void widen_ascii(wchar_t* dest, const char* source) - { - for (const char* i = source; *i; ++i) *dest++ = *i; - *dest = 0; - } -#endif - } -} - -namespace pugi -{ - static const size_t xml_memory_page_size = 32768; - - static const uintptr_t xml_memory_page_alignment = 32; - static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); - static const uintptr_t xml_memory_page_name_allocated_mask = 16; - static const uintptr_t xml_memory_page_value_allocated_mask = 8; - static const uintptr_t xml_memory_page_type_mask = 7; - - struct xml_allocator; - - struct xml_memory_page - { - static xml_memory_page* construct(void* memory) - { - if (!memory) return 0; - - xml_memory_page* result = static_cast(memory); - - result->allocator = 0; - result->memory = 0; - result->prev = 0; - result->next = 0; - result->busy_size = 0; - result->freed_size = 0; - - return result; - } - - xml_allocator* allocator; - - void* memory; - - xml_memory_page* prev; - xml_memory_page* next; - - size_t busy_size; - size_t freed_size; - - char data[1]; - }; - - struct xml_memory_string_header - { - xml_memory_page* page; - size_t full_size; - }; - - struct xml_allocator - { - xml_allocator(xml_memory_page* root): _root(root), _busy_size(root ? root->busy_size : 0) - { - } - - xml_memory_page* allocate_page(size_t data_size) - { - size_t size = offsetof(xml_memory_page, data) + data_size; - - // allocate block with some alignment, leaving memory for worst-case padding - void* memory = global_allocate(size + xml_memory_page_alignment); - if (!memory) return 0; - - // align upwards to page boundary - void* page_memory = reinterpret_cast((reinterpret_cast(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); - - // prepare page structure - xml_memory_page* page = xml_memory_page::construct(page_memory); - - page->memory = memory; - page->allocator = _root->allocator; - - return page; - } - - static void deallocate_page(xml_memory_page* page) - { - global_deallocate(page->memory); - } - - void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); - - void* allocate_memory(size_t size, xml_memory_page*& out_page) - { - if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page); - - void* buf = _root->data + _busy_size; - - _busy_size += size; - - out_page = _root; - - return buf; - } - - void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) - { - assert(ptr >= page->data && ptr < page->data + xml_memory_page_size); - (void)!ptr; - - if (page == _root) page->busy_size = _busy_size; - - page->freed_size += size; - assert(page->freed_size <= page->busy_size); - - if (page->freed_size == page->busy_size) - { - if (page->next == 0) - { - assert(_root == page); - - // top page freed, just reset sizes - page->busy_size = page->freed_size = 0; - _busy_size = 0; - } - else - { - assert(_root != page); - assert(page->prev); - - // remove from the list - page->prev->next = page->next; - page->next->prev = page->prev; - - // deallocate - deallocate_page(page); - } - } - } - - char_t* allocate_string(size_t length) - { - // get actual size, rounded up to pointer alignment boundary - size_t size = ((length * sizeof(char_t)) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1); - - // allocate memory for string and header block - size_t full_size = sizeof(xml_memory_string_header) + size; - - xml_memory_page* page; - xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); - - if (!header) return 0; - - // setup header - header->page = page; - header->full_size = full_size; - - return reinterpret_cast(header + 1); - } - - void deallocate_string(char_t* string) - { - // get header - xml_memory_string_header* header = reinterpret_cast(string) - 1; - - // deallocate - deallocate_memory(header, header->full_size, header->page); - } - - xml_memory_page* _root; - size_t _busy_size; - }; - - PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) - { - const size_t large_allocation_threshold = xml_memory_page_size / 4; - - xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); - if (!page) return 0; - - if (size <= large_allocation_threshold) - { - _root->busy_size = _busy_size; - - // insert page at the end of linked list - page->prev = _root; - _root->next = page; - _root = page; - - _busy_size = size; - } - else - { - // insert page before the end of linked list - assert(_root->prev); - - page->prev = _root->prev; - page->next = _root; - - _root->prev->next = page; - _root->prev = page; - } - - // allocate inside page - page->busy_size = size; - - out_page = page; - return page->data; - } - - /// A 'name=value' XML attribute structure. - struct xml_attribute_struct - { - /// Default ctor - xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) - { - } - - uintptr_t header; - - char_t* name; ///< Pointer to attribute name. - char_t* value; ///< Pointer to attribute value. - - xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) - xml_attribute_struct* next_attribute; ///< Next attribute - }; - - /// An XML document tree node. - struct xml_node_struct - { - /// Default ctor - /// \param type - node type - xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | type), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) - { - } - - uintptr_t header; - - xml_node_struct* parent; ///< Pointer to parent - - char_t* name; ///< Pointer to element name. - char_t* value; ///< Pointer to any associated string data. - - xml_node_struct* first_child; ///< First child - - xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list) - xml_node_struct* next_sibling; ///< Right brother - - xml_attribute_struct* first_attribute; ///< First attribute - }; - - struct xml_document_struct: public xml_node_struct - { - xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), allocator(0), buffer(0) - { - } - - xml_allocator allocator; - const char_t* buffer; - }; - - inline xml_allocator& get_allocator(const xml_node_struct* node) - { - assert(node); - - return *reinterpret_cast(node->header & xml_memory_page_pointer_mask)->allocator; - } -} - -// Low-level DOM operations -namespace -{ - using namespace pugi; - - inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) - { - xml_memory_page* page; - void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page); - - return new (memory) xml_attribute_struct(page); - } - - inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) - { - xml_memory_page* page; - void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page); - - return new (memory) xml_node_struct(page, type); - } - - inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) - { - uintptr_t header = a->header; - - if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name); - if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value); - - alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); - } - - inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) - { - uintptr_t header = n->header; - - if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); - if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); - - for (xml_attribute_struct* attr = n->first_attribute; attr; ) - { - xml_attribute_struct* next = attr->next_attribute; - - destroy_attribute(attr, alloc); - - attr = next; - } - - for (xml_node_struct* child = n->first_child; child; ) - { - xml_node_struct* next = child->next_sibling; - - destroy_node(child, alloc); - - child = next; - } - - alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); - } - - PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) - { - xml_node_struct* child = allocate_node(alloc, type); - if (!child) return 0; - - child->parent = node; - - xml_node_struct* first_child = node->first_child; - - if (first_child) - { - xml_node_struct* last_child = first_child->prev_sibling_c; - - last_child->next_sibling = child; - child->prev_sibling_c = last_child; - first_child->prev_sibling_c = child; - } - else - { - node->first_child = child; - child->prev_sibling_c = child; - } - - return child; - } - - PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc) - { - xml_attribute_struct* a = allocate_attribute(alloc); - if (!a) return 0; - - xml_attribute_struct* first_attribute = node->first_attribute; - - if (first_attribute) - { - xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c; - - last_attribute->next_attribute = a; - a->prev_attribute_c = last_attribute; - first_attribute->prev_attribute_c = a; - } - else - { - node->first_attribute = a; - a->prev_attribute_c = a; - } - - return a; - } -} - -// Helper classes for code generation -namespace -{ - struct opt_false - { - enum { value = 0 }; - }; - - struct opt_true - { - enum { value = 1 }; - }; -} - -// Unicode utilities -namespace -{ - inline uint16_t endian_swap(uint16_t value) - { - return static_cast(((value & 0xff) << 8) | (value >> 8)); - } - - inline uint32_t endian_swap(uint32_t value) - { - return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); - } - - struct utf8_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) return result + 1; - // U+0080..U+07FF - else if (ch < 0x800) return result + 2; - // U+0800..U+FFFF - else return result + 3; - } - - static value_type high(value_type result, uint32_t) - { - // U+10000..U+10FFFF - return result + 4; - } - }; - - struct utf8_writer - { - typedef uint8_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) - { - *result = static_cast(ch); - return result + 1; - } - // U+0080..U+07FF - else if (ch < 0x800) - { - result[0] = static_cast(0xC0 | (ch >> 6)); - result[1] = static_cast(0x80 | (ch & 0x3F)); - return result + 2; - } - // U+0800..U+FFFF - else - { - result[0] = static_cast(0xE0 | (ch >> 12)); - result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[2] = static_cast(0x80 | (ch & 0x3F)); - return result + 3; - } - } - - static value_type high(value_type result, uint32_t ch) - { - // U+10000..U+10FFFF - result[0] = static_cast(0xF0 | (ch >> 18)); - result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); - result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[3] = static_cast(0x80 | (ch & 0x3F)); - return result + 4; - } - - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; - - struct utf16_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t) - { - return result + 1; - } - - static value_type high(value_type result, uint32_t) - { - return result + 2; - } - }; - - struct utf16_writer - { - typedef uint16_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - *result = static_cast(ch); - - return result + 1; - } - - static value_type high(value_type result, uint32_t ch) - { - uint32_t msh = (uint32_t)(ch - 0x10000) >> 10; - uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff; - - result[0] = static_cast(0xD800 + msh); - result[1] = static_cast(0xDC00 + lsh); - - return result + 2; - } - - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; - - struct utf32_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t) - { - return result + 1; - } - - static value_type high(value_type result, uint32_t) - { - return result + 1; - } - }; - - struct utf32_writer - { - typedef uint32_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - - static value_type high(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - - static value_type any(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - }; - - template struct wchar_selector; - - template <> struct wchar_selector<2> - { - typedef uint16_t type; - typedef utf16_counter counter; - typedef utf16_writer writer; - }; - - template <> struct wchar_selector<4> - { - typedef uint32_t type; - typedef utf32_counter counter; - typedef utf32_writer writer; - }; - - typedef wchar_selector::counter wchar_counter; - typedef wchar_selector::writer wchar_writer; - - template struct utf_decoder - { - static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) - { - const uint8_t utf8_byte_mask = 0x3f; - - while (size) - { - uint8_t lead = *data; - - // 0xxxxxxx -> U+0000..U+007F - if (lead < 0x80) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - - // process aligned single-byte (ascii) blocks - if ((reinterpret_cast(data) & 3) == 0) - { - while (size >= 4 && (*reinterpret_cast(data) & 0x80808080) == 0) - { - result = Traits::low(result, data[0]); - result = Traits::low(result, data[1]); - result = Traits::low(result, data[2]); - result = Traits::low(result, data[3]); - data += 4; - size -= 4; - } - } - } - // 110xxxxx -> U+0080..U+07FF - else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); - data += 2; - size -= 2; - } - // 1110xxxx -> U+0800-U+FFFF - else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); - data += 3; - size -= 3; - } - // 11110xxx -> U+10000..U+10FFFF - else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) - { - result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); - data += 4; - size -= 4; - } - // 10xxxxxx or 11111xxx -> invalid - else - { - data += 1; - size -= 1; - } - } - - return result; - } - - static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) - { - const uint16_t* end = data + size; - - while (data < end) - { - uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; - - // U+0000..U+D7FF - if (lead < 0xD800) - { - result = Traits::low(result, lead); - data += 1; - } - // U+E000..U+FFFF - else if ((unsigned)(lead - 0xE000) < 0x2000) - { - result = Traits::low(result, lead); - data += 1; - } - // surrogate pair lead - else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end) - { - uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; - - if ((unsigned)(next - 0xDC00) < 0x400) - { - result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); - data += 2; - } - else - { - data += 1; - } - } - else - { - data += 1; - } - } - - return result; - } - - static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) - { - const uint32_t* end = data + size; - - while (data < end) - { - uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; - - // U+0000..U+FFFF - if (lead < 0x10000) - { - result = Traits::low(result, lead); - data += 1; - } - // U+10000..U+10FFFF - else - { - result = Traits::high(result, lead); - data += 1; - } - } - - return result; - } - }; - - template inline void convert_utf_endian_swap(T* result, const T* data, size_t length) - { - for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]); - } - - inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) - { - for (size_t i = 0; i < length; ++i) result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); - } -} - -namespace -{ - using namespace pugi; - - enum chartype_t - { - ct_parse_pcdata = 1, // \0, &, \r, < - ct_parse_attr = 2, // \0, &, \r, ', " - ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab - ct_space = 8, // \r, \n, space, tab - ct_parse_cdata = 16, // \0, ], >, \r - ct_parse_comment = 32, // \0, -, >, \r - ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . - ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : - }; - - const unsigned char chartype_table[256] = - { - 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 - 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 - - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 - }; - -#ifdef PUGIXML_WCHAR_MODE - #define IS_CHARTYPE(c, ct) ((static_cast(c) < 128 ? chartype_table[static_cast(c)] : chartype_table[128]) & (ct)) -#else - #define IS_CHARTYPE(c, ct) (chartype_table[static_cast(c)] & (ct)) -#endif - - enum output_chartype_t - { - oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > - oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, " - }; - - const unsigned char output_chartype_table[256] = - { - 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 - 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63 - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - -#ifdef PUGIXML_WCHAR_MODE - #define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast(c) < 128 ? output_chartype_table[static_cast(c)] : output_chartype_table[128]) & (ct)) -#else - #define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast(c)] & (ct)) -#endif - - bool is_little_endian() - { - unsigned int ui = 1; - - return *reinterpret_cast(&ui) == 1; - } - - xml_encoding get_wchar_encoding() - { - STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); - - if (sizeof(wchar_t) == 2) - return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - else - return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - } - - xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) - { - // replace wchar encoding with utf implementation - if (encoding == encoding_wchar) return get_wchar_encoding(); - - // replace utf16 encoding with utf16 with specific endianness - if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - // replace utf32 encoding with utf32 with specific endianness - if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - // only do autodetection if no explicit encoding is requested - if (encoding != encoding_auto) return encoding; - - // try to guess encoding (based on XML specification, Appendix F.1) - const uint8_t* data = static_cast(contents); - - // look for BOM in first few bytes - if (size > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0xfe && data[3] == 0xff) return encoding_utf32_be; - if (size > 4 && data[0] == 0xff && data[1] == 0xfe && data[2] == 0 && data[3] == 0) return encoding_utf32_le; - if (size > 2 && data[0] == 0xfe && data[1] == 0xff) return encoding_utf16_be; - if (size > 2 && data[0] == 0xff && data[1] == 0xfe) return encoding_utf16_le; - if (size > 3 && data[0] == 0xef && data[1] == 0xbb && data[2] == 0xbf) return encoding_utf8; - - // look for <, 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && data[3] == 0x3c) return encoding_utf32_be; - if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0 && data[3] == 0) return encoding_utf32_le; - if (size > 4 && data[0] == 0 && data[1] == 0x3c && data[2] == 0 && data[3] == 0x3f) return encoding_utf16_be; - if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0x3f && data[3] == 0) return encoding_utf16_le; - if (size > 4 && data[0] == 0x3c && data[1] == 0x3f && data[2] == 0x78 && data[3] == 0x6d) return encoding_utf8; - - // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) - if (size > 2 && data[0] == 0 && data[1] == 0x3c) return encoding_utf16_be; - if (size > 2 && data[0] == 0x3c && data[1] == 0) return encoding_utf16_le; - - // no known BOM detected, assume utf8 - return encoding_utf8; - } - - bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - if (is_mutable) - { - out_buffer = static_cast(const_cast(contents)); - } - else - { - void* buffer = global_allocate(size > 0 ? size : 1); - if (!buffer) return false; - - memcpy(buffer, contents, size); - - out_buffer = static_cast(buffer); - } - - out_length = size / sizeof(char_t); - - return true; - } - -#ifdef PUGIXML_WCHAR_MODE - inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re) - { - return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || - (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); - } - - bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - const char_t* data = static_cast(contents); - - if (is_mutable) - { - out_buffer = const_cast(data); - } - else - { - out_buffer = static_cast(global_allocate(size > 0 ? size : 1)); - if (!out_buffer) return false; - } - - out_length = size / sizeof(char_t); - - convert_wchar_endian_swap(out_buffer, data, out_length); - - return true; - } - - bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) - { - const uint8_t* data = static_cast(contents); - - // first pass: get length in wchar_t units - out_length = utf_decoder::decode_utf8_block(data, size, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf8 input to wchar_t - wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); - wchar_writer::value_type out_end = utf_decoder::decode_utf8_block(data, size, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint16_t* data = static_cast(contents); - size_t length = size / sizeof(uint16_t); - - // first pass: get length in wchar_t units - out_length = utf_decoder::decode_utf16_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf16 input to wchar_t - wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); - wchar_writer::value_type out_end = utf_decoder::decode_utf16_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint32_t* data = static_cast(contents); - size_t length = size / sizeof(uint32_t); - - // first pass: get length in wchar_t units - out_length = utf_decoder::decode_utf32_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf32 input to wchar_t - wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); - wchar_writer::value_type out_end = utf_decoder::decode_utf32_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // get native encoding - xml_encoding wchar_encoding = get_wchar_encoding(); - - // fast path: no conversion required - if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // only endian-swapping is required - if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf8 - if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return false; - } -#else - template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint16_t* data = static_cast(contents); - size_t length = size / sizeof(uint16_t); - - // first pass: get length in utf8 units - out_length = utf_decoder::decode_utf16_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf16 input to utf8 - uint8_t* out_begin = reinterpret_cast(out_buffer); - uint8_t* out_end = utf_decoder::decode_utf16_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint32_t* data = static_cast(contents); - size_t length = size / sizeof(uint32_t); - - // first pass: get length in utf8 units - out_length = utf_decoder::decode_utf32_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf32 input to utf8 - uint8_t* out_begin = reinterpret_cast(out_buffer); - uint8_t* out_end = utf_decoder::decode_utf32_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // fast path: no conversion required - if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return false; - } -#endif - - bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) - { - size_t source_length = impl::strlen(source); - - if (dest && impl::strlen(dest) >= source_length) - { - memcpy(dest, source, (source_length + 1) * sizeof(char_t)); - - return true; - } - else - { - xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator; - - char_t* buf = alloc->allocate_string(source_length + 1); - if (!buf) return false; - - memcpy(buf, source, (source_length + 1) * sizeof(char_t)); - - if (header & header_mask) alloc->deallocate_string(dest); - - dest = buf; - header |= header_mask; - - return true; - } - } - - struct gap - { - char_t* end; - size_t size; - - gap(): end(0), size(0) - { - } - - // Push new gap, move s count bytes further (skipping the gap). - // Collapse previous gap. - void push(char_t*& s, size_t count) - { - if (end) // there was a gap already; collapse it - { - // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - } - - s += count; // end of current gap - - // "merge" two gaps - end = s; - size += count; - } - - // Collapse all gaps, return past-the-end pointer - char_t* flush(char_t* s) - { - if (end) - { - // Move [old_gap_end, current_pos) to [old_gap_start, ...) - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - - return s - size; - } - else return s; - } - }; - - char_t* strconv_escape(char_t* s, gap& g) - { - char_t* stre = s + 1; - - switch (*stre) - { - case '#': // &#... - { - unsigned int ucsc = 0; - - if (stre[1] == 'x') // &#x... (hex code) - { - stre += 2; - - char_t ch = *stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(ch - '0') <= 9) - ucsc = 16 * ucsc + (ch - '0'); - else if (static_cast((ch | ' ') - 'a') <= 5) - ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - else // &#... (dec code) - { - char_t ch = *++stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(ch - '0') <= 9) - ucsc = 10 * ucsc + (ch - '0'); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - - #ifdef PUGIXML_WCHAR_MODE - s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc)); - #else - s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); - #endif - - g.push(s, stre - s); - return stre; - } - case 'a': // &a - { - ++stre; - - if (*stre == 'm') // &am - { - if (*++stre == 'p' && *++stre == ';') // & - { - *s++ = '&'; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - else if (*stre == 'p') // &ap - { - if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' - { - *s++ = '\''; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - break; - } - case 'g': // &g - { - if (*++stre == 't' && *++stre == ';') // > - { - *s++ = '>'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - case 'l': // &l - { - if (*++stre == 't' && *++stre == ';') // < - { - *s++ = '<'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - case 'q': // &q - { - if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " - { - *s++ = '"'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - } - - return stre; - } - - // Utility macro for last character handling - #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) - - char_t* strconv_comment(char_t* s, char_t endch) - { - if (!*s) return 0; - - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s; - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here - { - *g.flush(s) = 0; - - return s + (s[2] == '>' ? 3 : 2); - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - char_t* strconv_cdata(char_t* s, char_t endch) - { - if (!*s) return 0; - - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s; - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - typedef char_t* (*strconv_pcdata_t)(char_t*); - - template struct strconv_pcdata_impl - { - static char_t* parse(char_t* s) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s; - - if (*s == '<') // PCDATA ends here - { - *g.flush(s) = 0; - - return s + 1; - } - else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (*s == 0) - { - return s; - } - else ++s; - } - } - }; - - strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) - { - STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20); - - switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes) - { - case 0: return strconv_pcdata_impl::parse; - case 1: return strconv_pcdata_impl::parse; - case 2: return strconv_pcdata_impl::parse; - case 3: return strconv_pcdata_impl::parse; - default: return 0; // should not get here - } - } - - typedef char_t* (*strconv_attribute_t)(char_t*, char_t); - - template struct strconv_attribute_impl - { - static char_t* parse_wnorm(char_t* s, char_t end_quote) - { - gap g; - - // trim leading whitespaces - if (IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s; - - do ++str; - while (IS_CHARTYPE(*str, ct_space)); - - g.push(s, str - s); - } - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s; - - if (*s == end_quote) - { - char_t* str = g.flush(s); - - do *str-- = 0; - while (IS_CHARTYPE(*str, ct_space)); - - return s + 1; - } - else if (IS_CHARTYPE(*s, ct_space)) - { - *s++ = ' '; - - if (IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s + 1; - while (IS_CHARTYPE(*str, ct_space)) ++str; - - g.push(s, str - s); - } - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_wconv(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s; - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (IS_CHARTYPE(*s, ct_space)) - { - if (*s == '\r') - { - *s++ = ' '; - - if (*s == '\n') g.push(s, 1); - } - else *s++ = ' '; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_eol(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == '\r') - { - *s++ = '\n'; - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_simple(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - }; - - strconv_attribute_t get_strconv_attribute(unsigned int optmask) - { - STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); - - switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) - { - case 0: return strconv_attribute_impl::parse_simple; - case 1: return strconv_attribute_impl::parse_simple; - case 2: return strconv_attribute_impl::parse_eol; - case 3: return strconv_attribute_impl::parse_eol; - case 4: return strconv_attribute_impl::parse_wconv; - case 5: return strconv_attribute_impl::parse_wconv; - case 6: return strconv_attribute_impl::parse_wconv; - case 7: return strconv_attribute_impl::parse_wconv; - case 8: return strconv_attribute_impl::parse_wnorm; - case 9: return strconv_attribute_impl::parse_wnorm; - case 10: return strconv_attribute_impl::parse_wnorm; - case 11: return strconv_attribute_impl::parse_wnorm; - case 12: return strconv_attribute_impl::parse_wnorm; - case 13: return strconv_attribute_impl::parse_wnorm; - case 14: return strconv_attribute_impl::parse_wnorm; - case 15: return strconv_attribute_impl::parse_wnorm; - default: return 0; // should not get here - } - } - - inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) - { - xml_parse_result result = {status, offset, encoding_auto}; - return result; - } - - struct xml_parser - { - xml_allocator alloc; - char_t* error_offset; - jmp_buf error_handler; - - // Parser utilities. - #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; } - #define OPTSET(OPT) ( optmsk & OPT ) - #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) longjmp(error_handler, status_out_of_memory); } - #define POPNODE() { cursor = cursor->parent; } - #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; } - #define SCANWHILE(X) { while ((X)) ++s; } - #define ENDSEG() { ch = *s; *s = 0; ++s; } - #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err) - #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); } - - xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0) - { - } - - // DOCTYPE consists of nested sections of the following possible types: - // , , "...", '...' - // - // - // First group can not contain nested groups - // Second group can contain nested groups of the same type - // Third group can contain all other groups - void parse_doctype_primitive(char_t*& s) - { - if (*s == '"' || *s == '\'') - { - // quoted string - char_t ch = *s++; - SCANFOR(*s == ch); - if (!*s) THROW_ERROR(status_bad_doctype, s); - - s++; - } - else if (s[0] == '<' && s[1] == '?') - { - // - s += 2; - SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype - if (!*s) THROW_ERROR(status_bad_doctype, s); - - s += 2; - } - else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') - { - s += 4; - SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype - if (!*s) THROW_ERROR(status_bad_doctype, s); - - s += 4; - } - else THROW_ERROR(status_bad_doctype, s); - } - - void parse_doctype_ignore(char_t*& s) - { - assert(s[0] == '<' && s[1] == '!' && s[2] == '['); - s++; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] == '[') - { - // nested ignore section - parse_doctype_ignore(s); - } - else if (s[0] == ']' && s[1] == ']' && s[2] == '>') - { - // ignore section end - s += 3; - - return; - } - else s++; - } - - THROW_ERROR(status_bad_doctype, s); - } - - void parse_doctype(char_t*& s, char_t endch, bool toplevel) - { - assert(s[0] == '<' && s[1] == '!'); - s++; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] != '-') - { - if (s[2] == '[') - { - // ignore - parse_doctype_ignore(s); - } - else - { - // some control group - parse_doctype(s, endch, false); - } - } - else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') - { - // unknown tag (forbidden), or some primitive group - parse_doctype_primitive(s); - } - else if (*s == '>') - { - s++; - - return; - } - else s++; - } - - if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s); - } - - void parse_exclamation(char_t*& ref_s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) - { - // load into registers - char_t* s = ref_s; - - // parse node contents, starting with exclamation mark - ++s; - - if (*s == '-') // 'value = s; // Save the offset. - } - - if (OPTSET(parse_eol) && OPTSET(parse_comments)) - { - s = strconv_comment(s, endch); - - if (!s) THROW_ERROR(status_bad_comment, cursor->value); - } - else - { - // Scan for terminating '-->'. - SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')); - CHECK_ERROR(status_bad_comment, s); - - if (OPTSET(parse_comments)) - *s = 0; // Zero-terminate this segment at the first terminating '-'. - - s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. - } - - if (OPTSET(parse_comments)) - { - POPNODE(); // Pop since this is a standalone. - } - } - else THROW_ERROR(status_bad_comment, s); - } - else if (*s == '[') - { - // 'value = s; // Save the offset. - - if (OPTSET(parse_eol)) - { - s = strconv_cdata(s, endch); - - if (!s) THROW_ERROR(status_bad_cdata, cursor->value); - } - else - { - // Scan for terminating ']]>'. - SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); - CHECK_ERROR(status_bad_cdata, s); - - *s++ = 0; // Zero-terminate this segment. - } - - POPNODE(); // Pop since this is a standalone. - } - else // Flagged for discard, but we still have to scan for the terminator. - { - // Scan for terminating ']]>'. - SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); - CHECK_ERROR(status_bad_cdata, s); - - ++s; - } - - s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. - } - else THROW_ERROR(status_bad_cdata, s); - } - else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E')) - { - if (s[6] != 'E') THROW_ERROR(status_bad_doctype, s); - - s -= 2; - - parse_doctype(s, endch, true); - } - else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s); - else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s); - else THROW_ERROR(status_unrecognized_tag, s); - - // store from registers - ref_s = s; - } - - void parse_question(char_t*& ref_s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) - { - // load into registers - char_t* s = ref_s; - xml_node_struct* cursor = ref_cursor; - char_t ch = 0; - - // parse node contents, starting with question mark - ++s; - - // read PI target - char_t* target = s; - - if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s); - - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); - CHECK_ERROR(status_bad_pi, s); - - // determine node type; stricmp / strcasecmp is not portable - bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; - - if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi)) - { - if (declaration) - { - // disallow non top-level declarations - if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s); - - PUSHNODE(node_declaration); - } - else - { - PUSHNODE(node_pi); - } - - cursor->name = target; - - ENDSEG(); - - // parse value/attributes - if (ch == '?') - { - // empty node - if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); - s += (*s == '>'); - - POPNODE(); - } - else if (IS_CHARTYPE(ch, ct_space)) - { - SKIPWS(); - - // scan for tag end - char_t* value = s; - - SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); - CHECK_ERROR(status_bad_pi, s); - - if (declaration) - { - // replace ending ? with / so that 'element' terminates properly - *s = '/'; - - // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES - s = value; - } - else - { - // store value and step over > - cursor->value = value; - POPNODE(); - - ENDSEG(); - - s += (*s == '>'); - } - } - else THROW_ERROR(status_bad_pi, s); - } - else - { - // scan for tag end - SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); - CHECK_ERROR(status_bad_pi, s); - - s += (s[1] == '>' ? 2 : 1); - } - - // store from registers - ref_s = s; - ref_cursor = cursor; - } - - void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) - { - strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); - strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); - - char_t ch = 0; - xml_node_struct* cursor = xmldoc; - char_t* mark = s; - - while (*s != 0) - { - if (*s == '<') - { - ++s; - - LOC_TAG: - if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' - { - PUSHNODE(node_element); // Append a new node to the tree. - - cursor->name = s; - - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. - ENDSEG(); // Save char in 'ch', terminate & step over. - - if (ch == '>') - { - // end of tag - } - else if (IS_CHARTYPE(ch, ct_space)) - { - LOC_ATTRIBUTES: - while (true) - { - SKIPWS(); // Eat any whitespace. - - if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #... - { - xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. - if (!a) THROW_ERROR(status_out_of_memory, 0); - - a->name = s; // Save the offset. - - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. - CHECK_ERROR(status_bad_attribute, s); - - ENDSEG(); // Save char in 'ch', terminate & step over. - CHECK_ERROR(status_bad_attribute, s); - - if (IS_CHARTYPE(ch, ct_space)) - { - SKIPWS(); // Eat any whitespace. - CHECK_ERROR(status_bad_attribute, s); - - ch = *s; - ++s; - } - - if (ch == '=') // '<... #=...' - { - SKIPWS(); // Eat any whitespace. - - if (*s == '"' || *s == '\'') // '<... #="...' - { - ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. - ++s; // Step over the quote. - a->value = s; // Save the offset. - - s = strconv_attribute(s, ch); - - if (!s) THROW_ERROR(status_bad_attribute, a->value); - - // After this line the loop continues from the start; - // Whitespaces, / and > are ok, symbols and EOF are wrong, - // everything else will be detected - if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); - } - else THROW_ERROR(status_bad_attribute, s); - } - else THROW_ERROR(status_bad_attribute, s); - } - else if (*s == '/') - { - ++s; - - if (*s == '>') - { - POPNODE(); - s++; - break; - } - else if (*s == 0 && endch == '>') - { - POPNODE(); - break; - } - else THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '>') - { - ++s; - - break; - } - else if (*s == 0 && endch == '>') - { - break; - } - else THROW_ERROR(status_bad_start_element, s); - } - - // !!! - } - else if (ch == '/') // '<#.../' - { - if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s); - - POPNODE(); // Pop. - - s += (*s == '>'); - } - else if (ch == 0) - { - // we stepped over null terminator, backtrack & handle closing tag - --s; - - if (endch != '>') THROW_ERROR(status_bad_start_element, s); - } - else THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '/') - { - ++s; - - char_t* name = cursor->name; - if (!name) THROW_ERROR(status_end_element_mismatch, s); - - while (IS_CHARTYPE(*s, ct_symbol)) - { - if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s); - } - - if (*name) - { - if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s); - else THROW_ERROR(status_end_element_mismatch, s); - } - - POPNODE(); // Pop. - - SKIPWS(); - - if (*s == 0) - { - if (endch != '>') THROW_ERROR(status_bad_end_element, s); - } - else - { - if (*s != '>') THROW_ERROR(status_bad_end_element, s); - ++s; - } - } - else if (*s == '?') // 'header & xml_memory_page_type_mask) == node_declaration) goto LOC_ATTRIBUTES; - } - else if (*s == '!') // 'parent) - { - PUSHNODE(node_pcdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. - - s = strconv_pcdata(s); - - POPNODE(); // Pop since this is a standalone. - - if (!*s) break; - } - else - { - SCANFOR(*s == '<'); // '...<' - if (!*s) break; - - ++s; - } - - // We're after '<' - goto LOC_TAG; - } - } - - // check that last tag is closed - if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s); - } - - static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* xmldoc, unsigned int optmsk) - { - // store buffer for offset_debug - static_cast(xmldoc)->buffer = buffer; - - // early-out for empty documents - if (length == 0) return make_parse_result(status_ok); - - // create parser on stack - xml_allocator& alloc = static_cast(xmldoc)->allocator; - - xml_parser parser(alloc); - - // save last character and make buffer zero-terminated (speeds up parsing) - char_t endch = buffer[length - 1]; - buffer[length - 1] = 0; - - // perform actual parsing - int error = setjmp(parser.error_handler); - - if (error == 0) - { - parser.parse(buffer, xmldoc, optmsk, endch); - } - - xml_parse_result result = make_parse_result(static_cast(error), parser.error_offset ? parser.error_offset - buffer : 0); - - // update allocator state - alloc = parser.alloc; - - // since we removed last character, we have to handle the only possible false positive - if (result && endch == '<') - { - // there's no possible well-formed document with < at the end - return make_parse_result(status_unrecognized_tag, length); - } - - return result; - } - }; - - // Output facilities - xml_encoding get_write_native_encoding() - { - #ifdef PUGIXML_WCHAR_MODE - return get_wchar_encoding(); - #else - return encoding_utf8; - #endif - } - - xml_encoding get_write_encoding(xml_encoding encoding) - { - // replace wchar encoding with utf implementation - if (encoding == encoding_wchar) return get_wchar_encoding(); - - // replace utf16 encoding with utf16 with specific endianness - if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - // replace utf32 encoding with utf32 with specific endianness - if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - // only do autodetection if no explicit encoding is requested - if (encoding != encoding_auto) return encoding; - - // assume utf8 encoding - return encoding_utf8; - } - -#ifdef PUGIXML_WCHAR_MODE - size_t get_valid_length(const char_t* data, size_t length) - { - assert(length > 0); - - // discard last character if it's the lead of a surrogate pair - return (sizeof(wchar_t) == 2 && (unsigned)(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; - } - - size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) - { - // only endian-swapping is required - if (need_endian_swap_utf(encoding, get_wchar_encoding())) - { - convert_wchar_endian_swap(reinterpret_cast(result), data, length); - - return length * sizeof(char_t); - } - - // convert to utf8 - if (encoding == encoding_utf8) - { - uint8_t* dest = reinterpret_cast(result); - - uint8_t* end = sizeof(wchar_t) == 2 ? - utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest) : - utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); - - return static_cast(end - dest); - } - - // convert to utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - uint16_t* dest = reinterpret_cast(result); - - // convert to native utf16 - uint16_t* end = utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint16_t); - } - - // convert to utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - uint32_t* dest = reinterpret_cast(result); - - // convert to native utf32 - uint32_t* end = utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint32_t); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return 0; - } -#else - size_t get_valid_length(const char_t* data, size_t length) - { - assert(length > 4); - - for (size_t i = 1; i <= 4; ++i) - { - uint8_t ch = static_cast(data[length - i]); - - // either a standalone character or a leading one - if ((ch & 0xc0) != 0x80) return length - i; - } - - // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk - return length; - } - - size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) - { - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - uint16_t* dest = reinterpret_cast(result); - - // convert to native utf16 - uint16_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint16_t); - } - - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - uint32_t* dest = reinterpret_cast(result); - - // convert to native utf32 - uint32_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint32_t); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return 0; - } -#endif - - class xml_buffered_writer - { - xml_buffered_writer(const xml_buffered_writer&); - xml_buffered_writer& operator=(const xml_buffered_writer&); - - public: - xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding)) - { - } - - ~xml_buffered_writer() - { - flush(); - } - - void flush() - { - flush(buffer, bufsize); - bufsize = 0; - } - - void flush(const char_t* data, size_t size) - { - if (size == 0) return; - - // fast path, just write data - if (encoding == get_write_native_encoding()) - writer.write(data, size * sizeof(char_t)); - else - { - // convert chunk - size_t result = convert_buffer(scratch, data, size, encoding); - assert(result <= sizeof(scratch)); - - // write data - writer.write(scratch, result); - } - } - - void write(const char_t* data, size_t length) - { - if (bufsize + length > bufcapacity) - { - // flush the remaining buffer contents - flush(); - - // handle large chunks - if (length > bufcapacity) - { - if (encoding == get_write_native_encoding()) - { - // fast path, can just write data chunk - writer.write(data, length * sizeof(char_t)); - return; - } - - // need to convert in suitable chunks - while (length > bufcapacity) - { - // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer - // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) - size_t chunk_size = get_valid_length(data, bufcapacity); - - // convert chunk and write - flush(data, chunk_size); - - // iterate - data += chunk_size; - length -= chunk_size; - } - - // small tail is copied below - bufsize = 0; - } - } - - memcpy(buffer + bufsize, data, length * sizeof(char_t)); - bufsize += length; - } - - void write(const char_t* data) - { - write(data, impl::strlen(data)); - } - - void write(char_t d0) - { - if (bufsize + 1 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - bufsize += 1; - } - - void write(char_t d0, char_t d1) - { - if (bufsize + 2 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - bufsize += 2; - } - - void write(char_t d0, char_t d1, char_t d2) - { - if (bufsize + 3 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - bufsize += 3; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3) - { - if (bufsize + 4 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - buffer[bufsize + 3] = d3; - bufsize += 4; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) - { - if (bufsize + 5 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - buffer[bufsize + 3] = d3; - buffer[bufsize + 4] = d4; - bufsize += 5; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) - { - if (bufsize + 6 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - buffer[bufsize + 3] = d3; - buffer[bufsize + 4] = d4; - buffer[bufsize + 5] = d5; - bufsize += 6; - } - - // utf8 maximum expansion: x4 (-> utf32) - // utf16 maximum expansion: x2 (-> utf32) - // utf32 maximum expansion: x1 - enum { bufcapacity = 2048 }; - - char_t buffer[bufcapacity]; - char scratch[4 * bufcapacity]; - - xml_writer& writer; - size_t bufsize; - xml_encoding encoding; - }; - - void write_bom(xml_writer& writer, xml_encoding encoding) - { - switch (encoding) - { - case encoding_utf8: - writer.write("\xef\xbb\xbf", 3); - break; - - case encoding_utf16_be: - writer.write("\xfe\xff", 2); - break; - - case encoding_utf16_le: - writer.write("\xff\xfe", 2); - break; - - case encoding_utf32_be: - writer.write("\x00\x00\xfe\xff", 4); - break; - - case encoding_utf32_le: - writer.write("\xff\xfe\x00\x00", 4); - break; - - default: - // invalid encoding (this should not happen) - assert(false); - } - } - - void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type) - { - while (*s) - { - const char_t* prev = s; - - // While *s is a usual symbol - while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s; - - writer.write(prev, static_cast(s - prev)); - - switch (*s) - { - case 0: break; - case '&': - writer.write('&', 'a', 'm', 'p', ';'); - ++s; - break; - case '<': - writer.write('&', 'l', 't', ';'); - ++s; - break; - case '>': - writer.write('&', 'g', 't', ';'); - ++s; - break; - case '"': - writer.write('&', 'q', 'u', 'o', 't', ';'); - ++s; - break; - default: // s is not a usual symbol - { - unsigned int ch = static_cast(*s++); - assert(ch < 32); - - writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';'); - } - } - } - } - - void node_output_attributes(xml_buffered_writer& writer, const xml_node& node) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) - { - writer.write(' '); - writer.write(a.name()[0] ? a.name() : default_name); - writer.write('=', '"'); - - text_output_escaped(writer, a.value(), oct_special_attr); - - writer.write('"'); - } - } - - void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - if ((flags & format_indent) != 0 && (flags & format_raw) == 0) - for (unsigned int i = 0; i < depth; ++i) writer.write(indent); - - switch (node.type()) - { - case node_document: - { - for (xml_node n = node.first_child(); n; n = n.next_sibling()) - node_output(writer, n, indent, flags, depth); - break; - } - - case node_element: - { - const char_t* name = node.name()[0] ? node.name() : default_name; - - writer.write('<'); - writer.write(name); - - node_output_attributes(writer, node); - - if (flags & format_raw) - { - if (!node.first_child()) - writer.write(' ', '/', '>'); - else - { - writer.write('>'); - - for (xml_node n = node.first_child(); n; n = n.next_sibling()) - node_output(writer, n, indent, flags, depth + 1); - - writer.write('<', '/'); - writer.write(name); - writer.write('>'); - } - } - else if (!node.first_child()) - writer.write(' ', '/', '>', '\n'); - else if (node.first_child() == node.last_child() && node.first_child().type() == node_pcdata) - { - writer.write('>'); - - text_output_escaped(writer, node.first_child().value(), oct_special_pcdata); - - writer.write('<', '/'); - writer.write(name); - writer.write('>', '\n'); - } - else - { - writer.write('>', '\n'); - - for (xml_node n = node.first_child(); n; n = n.next_sibling()) - node_output(writer, n, indent, flags, depth + 1); - - if ((flags & format_indent) != 0 && (flags & format_raw) == 0) - for (unsigned int i = 0; i < depth; ++i) writer.write(indent); - - writer.write('<', '/'); - writer.write(name); - writer.write('>', '\n'); - } - - break; - } - - case node_pcdata: - text_output_escaped(writer, node.value(), oct_special_pcdata); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - case node_cdata: - writer.write('<', '!', '[', 'C', 'D'); - writer.write('A', 'T', 'A', '['); - writer.write(node.value()); - writer.write(']', ']', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - case node_comment: - writer.write('<', '!', '-', '-'); - writer.write(node.value()); - writer.write('-', '-', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - case node_pi: - case node_declaration: - writer.write('<', '?'); - writer.write(node.name()[0] ? node.name() : default_name); - - if (node.type() == node_declaration) - { - node_output_attributes(writer, node); - } - else if (node.value()[0]) - { - writer.write(' '); - writer.write(node.value()); - } - - writer.write('?', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - default: - assert(false); - } - } - - inline bool has_declaration(const xml_node& node) - { - for (xml_node child = node.first_child(); child; child = child.next_sibling()) - { - xml_node_type type = child.type(); - - if (type == node_declaration) return true; - if (type == node_element) return false; - } - - return false; - } - - inline bool allow_insert_child(xml_node_type parent, xml_node_type child) - { - if (parent != node_document && parent != node_element) return false; - if (child == node_document || child == node_null) return false; - if (parent != node_document && child == node_declaration) return false; - - return true; - } - - void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip) - { - assert(dest.type() == source.type()); - - switch (source.type()) - { - case node_element: - { - dest.set_name(source.name()); - - for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) - dest.append_attribute(a.name()).set_value(a.value()); - - for (xml_node c = source.first_child(); c; c = c.next_sibling()) - { - if (c == skip) continue; - - xml_node cc = dest.append_child(c.type()); - assert(cc); - - recursive_copy_skip(cc, c, skip); - } - - break; - } - - case node_pcdata: - case node_cdata: - case node_comment: - dest.set_value(source.value()); - break; - - case node_pi: - dest.set_name(source.name()); - dest.set_value(source.value()); - break; - - case node_declaration: - { - dest.set_name(source.name()); - - for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) - dest.append_attribute(a.name()).set_value(a.value()); - - break; - } - - default: - assert(false); - } - } - -#ifndef PUGIXML_NO_STL - template xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream >& stream, unsigned int options, xml_encoding encoding) - { - if (!stream.good()) return make_parse_result(status_io_error); - - // get length of remaining data in stream - std::streamoff pos = stream.tellg(); - stream.seekg(0, std::ios::end); - std::streamoff length = stream.tellg() - pos; - stream.seekg(pos, std::ios::beg); - - if (!stream.good() || pos < 0 || length < 0) return make_parse_result(status_io_error); - - // read stream data into memory - size_t read_length = static_cast(length); - - T* s = static_cast(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T))); - if (!s) return make_parse_result(status_out_of_memory); - - stream.read(s, static_cast(read_length)); - - // check for errors - size_t actual_length = static_cast(stream.gcount()); - assert(actual_length <= read_length); - - if (read_length > 0 && actual_length == 0) - { - global_deallocate(s); - return make_parse_result(status_io_error); - } - - // load data from buffer - return doc.load_buffer_inplace_own(s, actual_length * sizeof(T), options, encoding); - } -#endif -} - -namespace pugi -{ - xml_writer_file::xml_writer_file(void* file): file(file) - { - } - - void xml_writer_file::write(const void* data, size_t size) - { - fwrite(data, size, 1, static_cast(file)); - } - -#ifndef PUGIXML_NO_STL - xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(&stream), wide_stream(0) - { - } - - xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(0), wide_stream(&stream) - { - } - - void xml_writer_stream::write(const void* data, size_t size) - { - if (narrow_stream) - { - assert(!wide_stream); - narrow_stream->write(reinterpret_cast(data), static_cast(size)); - } - else - { - assert(wide_stream); - assert(size % sizeof(wchar_t) == 0); - - wide_stream->write(reinterpret_cast(data), static_cast(size / sizeof(wchar_t))); - } - } -#endif - - xml_tree_walker::xml_tree_walker(): _depth(0) - { - } - - xml_tree_walker::~xml_tree_walker() - { - } - - int xml_tree_walker::depth() const - { - return _depth; - } - - bool xml_tree_walker::begin(xml_node&) - { - return true; - } - - bool xml_tree_walker::end(xml_node&) - { - return true; - } - - xml_attribute::xml_attribute(): _attr(0) - { - } - - xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) - { - } - - xml_attribute::operator xml_attribute::unspecified_bool_type() const - { -#ifdef __MWERKS__ - return _attr ? &xml_attribute::empty : 0; -#else - return _attr ? &xml_attribute::_attr : 0; -#endif - } - - bool xml_attribute::operator!() const - { - return !_attr; - } - - bool xml_attribute::operator==(const xml_attribute& r) const - { - return (_attr == r._attr); - } - - bool xml_attribute::operator!=(const xml_attribute& r) const - { - return (_attr != r._attr); - } - - bool xml_attribute::operator<(const xml_attribute& r) const - { - return (_attr < r._attr); - } - - bool xml_attribute::operator>(const xml_attribute& r) const - { - return (_attr > r._attr); - } - - bool xml_attribute::operator<=(const xml_attribute& r) const - { - return (_attr <= r._attr); - } - - bool xml_attribute::operator>=(const xml_attribute& r) const - { - return (_attr >= r._attr); - } - - xml_attribute xml_attribute::next_attribute() const - { - return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); - } - - xml_attribute xml_attribute::previous_attribute() const - { - return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); - } - - int xml_attribute::as_int() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return (int)wcstol(_attr->value, 0, 10); - #else - return (int)strtol(_attr->value, 0, 10); - #endif - } - - unsigned int xml_attribute::as_uint() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return (unsigned int)wcstoul(_attr->value, 0, 10); - #else - return (unsigned int)strtoul(_attr->value, 0, 10); - #endif - } - - double xml_attribute::as_double() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return wcstod(_attr->value, 0); - #else - return strtod(_attr->value, 0); - #endif - } - - float xml_attribute::as_float() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return (float)wcstod(_attr->value, 0); - #else - return (float)strtod(_attr->value, 0); - #endif - } - - bool xml_attribute::as_bool() const - { - if (!_attr || !_attr->value) return false; - - // only look at first char - char_t first = *_attr->value; - - // 1*, t* (true), T* (True), y* (yes), Y* (YES) - return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); - } - - bool xml_attribute::empty() const - { - return !_attr; - } - - const char_t* xml_attribute::name() const - { - return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT(""); - } - - const char_t* xml_attribute::value() const - { - return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT(""); - } - - unsigned int xml_attribute::document_order() const - { - return 0; - } - - xml_attribute& xml_attribute::operator=(const char_t* rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(int rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(unsigned int rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(double rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(bool rhs) - { - set_value(rhs); - return *this; - } - - bool xml_attribute::set_name(const char_t* rhs) - { - if (!_attr) return false; - - return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs); - } - - bool xml_attribute::set_value(const char_t* rhs) - { - if (!_attr) return false; - - return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs); - } - - bool xml_attribute::set_value(int rhs) - { - char buf[128]; - sprintf(buf, "%d", rhs); - - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); - - return set_value(wbuf); - #else - return set_value(buf); - #endif - } - - bool xml_attribute::set_value(unsigned int rhs) - { - char buf[128]; - sprintf(buf, "%u", rhs); - - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); - - return set_value(wbuf); - #else - return set_value(buf); - #endif - } - - bool xml_attribute::set_value(double rhs) - { - char buf[128]; - sprintf(buf, "%g", rhs); - - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); - - return set_value(wbuf); - #else - return set_value(buf); - #endif - } - - bool xml_attribute::set_value(bool rhs) - { - return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); - } - -#ifdef __BORLANDC__ - bool operator&&(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - bool operator||(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - xml_node::xml_node(): _root(0) - { - } - - xml_node::xml_node(xml_node_struct* p): _root(p) - { - } - - xml_node::operator xml_node::unspecified_bool_type() const - { -#ifdef __MWERKS__ - return _root ? &xml_node::empty : 0; -#else - return _root ? &xml_node::_root : 0; -#endif - } - - bool xml_node::operator!() const - { - return !_root; - } - - xml_node::iterator xml_node::begin() const - { - return iterator(_root ? _root->first_child : 0, _root); - } - - xml_node::iterator xml_node::end() const - { - return iterator(0, _root); - } - - xml_node::attribute_iterator xml_node::attributes_begin() const - { - return attribute_iterator(_root ? _root->first_attribute : 0, _root); - } - - xml_node::attribute_iterator xml_node::attributes_end() const - { - return attribute_iterator(0, _root); - } - - bool xml_node::operator==(const xml_node& r) const - { - return (_root == r._root); - } - - bool xml_node::operator!=(const xml_node& r) const - { - return (_root != r._root); - } - - bool xml_node::operator<(const xml_node& r) const - { - return (_root < r._root); - } - - bool xml_node::operator>(const xml_node& r) const - { - return (_root > r._root); - } - - bool xml_node::operator<=(const xml_node& r) const - { - return (_root <= r._root); - } - - bool xml_node::operator>=(const xml_node& r) const - { - return (_root >= r._root); - } - - bool xml_node::empty() const - { - return !_root; - } - - const char_t* xml_node::name() const - { - return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); - } - - xml_node_type xml_node::type() const - { - return _root ? static_cast(_root->header & xml_memory_page_type_mask) : node_null; - } - - const char_t* xml_node::value() const - { - return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); - } - - xml_node xml_node::child(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::child_w(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_attribute xml_node::attribute(const char_t* name) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) - if (i->name && impl::strequal(name, i->name)) - return xml_attribute(i); - - return xml_attribute(); - } - - xml_attribute xml_node::attribute_w(const char_t* name) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) - if (i->name && impl::strequalwild(name, i->name)) - return xml_attribute(i); - - return xml_attribute(); - } - - xml_node xml_node::next_sibling(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequal(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::next_sibling_w(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::next_sibling() const - { - if (!_root) return xml_node(); - - if (_root->next_sibling) return xml_node(_root->next_sibling); - else return xml_node(); - } - - xml_node xml_node::previous_sibling(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequal(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::previous_sibling_w(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::previous_sibling() const - { - if (!_root) return xml_node(); - - if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); - else return xml_node(); - } - - xml_node xml_node::parent() const - { - return _root ? xml_node(_root->parent) : xml_node(); - } - - xml_node xml_node::root() const - { - xml_node_struct* r = _root; - - while (r && r->parent) r = r->parent; - - return xml_node(r); - } - - const char_t* xml_node::child_value() const - { - if (!_root) return PUGIXML_TEXT(""); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - { - xml_node_type type = static_cast(i->header & xml_memory_page_type_mask); - - if (i->value && (type == node_pcdata || type == node_cdata)) - return i->value; - } - - return PUGIXML_TEXT(""); - } - - const char_t* xml_node::child_value(const char_t* name) const - { - return child(name).child_value(); - } - - const char_t* xml_node::child_value_w(const char_t* name) const - { - if (!_root) return PUGIXML_TEXT(""); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i).child_value(); - - return PUGIXML_TEXT(""); - } - - xml_attribute xml_node::first_attribute() const - { - return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); - } - - xml_attribute xml_node::last_attribute() const - { - return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); - } - - xml_node xml_node::first_child() const - { - return _root ? xml_node(_root->first_child) : xml_node(); - } - - xml_node xml_node::last_child() const - { - return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); - } - - bool xml_node::set_name(const char_t* rhs) - { - switch (type()) - { - case node_pi: - case node_declaration: - case node_element: - return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs); - - default: - return false; - } - } - - bool xml_node::set_value(const char_t* rhs) - { - switch (type()) - { - case node_pi: - case node_cdata: - case node_pcdata: - case node_comment: - return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs); - - default: - return false; - } - } - - xml_attribute xml_node::append_attribute(const char_t* name) - { - if (type() != node_element && type() != node_declaration) return xml_attribute(); - - xml_attribute a(append_attribute_ll(_root, get_allocator(_root))); - if (!a) return xml_attribute(); - - a.set_name(name); - - return a; - } - - xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr) - { - if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); - - // check that attribute belongs to *this - xml_attribute_struct* cur = attr._attr; - - while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; - - if (cur != _root->first_attribute) return xml_attribute(); - - xml_attribute a(allocate_attribute(get_allocator(_root))); - if (!a) return xml_attribute(); - - a.set_name(name); - - if (attr._attr->prev_attribute_c->next_attribute) - attr._attr->prev_attribute_c->next_attribute = a._attr; - else - _root->first_attribute = a._attr; - - a._attr->prev_attribute_c = attr._attr->prev_attribute_c; - a._attr->next_attribute = attr._attr; - attr._attr->prev_attribute_c = a._attr; - - return a; - } - - xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr) - { - if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); - - // check that attribute belongs to *this - xml_attribute_struct* cur = attr._attr; - - while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; - - if (cur != _root->first_attribute) return xml_attribute(); - - xml_attribute a(allocate_attribute(get_allocator(_root))); - if (!a) return xml_attribute(); - - a.set_name(name); - - if (attr._attr->next_attribute) - attr._attr->next_attribute->prev_attribute_c = a._attr; - else - _root->first_attribute->prev_attribute_c = a._attr; - - a._attr->next_attribute = attr._attr->next_attribute; - a._attr->prev_attribute_c = attr._attr; - attr._attr->next_attribute = a._attr; - - return a; - } - - xml_attribute xml_node::append_copy(const xml_attribute& proto) - { - if (!proto) return xml_attribute(); - - xml_attribute result = append_attribute(proto.name()); - result.set_value(proto.value()); - - return result; - } - - xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - - xml_attribute result = insert_attribute_after(proto.name(), attr); - result.set_value(proto.value()); - - return result; - } - - xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - - xml_attribute result = insert_attribute_before(proto.name(), attr); - result.set_value(proto.value()); - - return result; - } - - xml_node xml_node::append_child(xml_node_type type) - { - if (!allow_insert_child(this->type(), type)) return xml_node(); - - xml_node n(append_node(_root, get_allocator(_root), type)); - if (!n) return xml_node(); - - if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node) - { - if (!allow_insert_child(this->type(), type)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - xml_node n(allocate_node(get_allocator(_root), type)); - if (!n) return xml_node(); - - n._root->parent = _root; - - if (node._root->prev_sibling_c->next_sibling) - node._root->prev_sibling_c->next_sibling = n._root; - else - _root->first_child = n._root; - - n._root->prev_sibling_c = node._root->prev_sibling_c; - n._root->next_sibling = node._root; - node._root->prev_sibling_c = n._root; - - if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node) - { - if (!allow_insert_child(this->type(), type)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - xml_node n(allocate_node(get_allocator(_root), type)); - if (!n) return xml_node(); - - n._root->parent = _root; - - if (node._root->next_sibling) - node._root->next_sibling->prev_sibling_c = n._root; - else - _root->first_child->prev_sibling_c = n._root; - - n._root->next_sibling = node._root->next_sibling; - n._root->prev_sibling_c = node._root; - node._root->next_sibling = n._root; - - if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - xml_node xml_node::append_copy(const xml_node& proto) - { - xml_node result = append_child(proto.type()); - - if (result) recursive_copy_skip(result, proto, result); - - return result; - } - - xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) - { - xml_node result = insert_child_after(proto.type(), node); - - if (result) recursive_copy_skip(result, proto, result); - - return result; - } - - xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) - { - xml_node result = insert_child_before(proto.type(), node); - - if (result) recursive_copy_skip(result, proto, result); - - return result; - } - - bool xml_node::remove_attribute(const char_t* name) - { - return remove_attribute(attribute(name)); - } - - bool xml_node::remove_attribute(const xml_attribute& a) - { - if (!_root || !a._attr) return false; - - // check that attribute belongs to *this - xml_attribute_struct* attr = a._attr; - - while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c; - - if (attr != _root->first_attribute) return false; - - if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c; - else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c; - - if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute; - else _root->first_attribute = a._attr->next_attribute; - - destroy_attribute(a._attr, get_allocator(_root)); - - return true; - } - - bool xml_node::remove_child(const char_t* name) - { - return remove_child(child(name)); - } - - bool xml_node::remove_child(const xml_node& n) - { - if (!_root || !n._root || n._root->parent != _root) return false; - - if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c; - else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c; - - if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling; - else _root->first_child = n._root->next_sibling; - - destroy_node(n._root, get_allocator(_root)); - - return true; - } - - xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name, i->name)) - { - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) - return xml_node(i); - } - - return xml_node(); - } - - xml_node xml_node::find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) - { - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) - return xml_node(i); - } - - return xml_node(); - } - - xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) - return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) - return xml_node(i); - - return xml_node(); - } - -#ifndef PUGIXML_NO_STL - string_t xml_node::path(char_t delimiter) const - { - string_t path; - - xml_node cursor = *this; // Make a copy. - - path = cursor.name(); - - while (cursor.parent()) - { - cursor = cursor.parent(); - - string_t temp = cursor.name(); - temp += delimiter; - temp += path; - path.swap(temp); - } - - return path; - } -#endif - - xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const - { - xml_node found = *this; // Current search context. - - if (!_root || !path || !path[0]) return found; - - if (path[0] == delimiter) - { - // Absolute path; e.g. '/foo/bar' - while (found.parent()) found = found.parent(); - ++path; - } - - const char_t* path_segment = path; - - while (*path_segment == delimiter) ++path_segment; - - const char_t* path_segment_end = path_segment; - - while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; - - if (path_segment == path_segment_end) return found; - - const char_t* next_segment = path_segment_end; - - while (*next_segment == delimiter) ++next_segment; - - if (*path_segment == '.' && path_segment + 1 == path_segment_end) - return found.first_element_by_path(next_segment, delimiter); - else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) - return found.parent().first_element_by_path(next_segment, delimiter); - else - { - for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) - { - if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) - { - xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); - - if (subsearch) return subsearch; - } - } - - return xml_node(); - } - } - - bool xml_node::traverse(xml_tree_walker& walker) - { - walker._depth = -1; - - xml_node arg_begin = *this; - if (!walker.begin(arg_begin)) return false; - - xml_node cur = first_child(); - - if (cur) - { - ++walker._depth; - - do - { - xml_node arg_for_each = cur; - if (!walker.for_each(arg_for_each)) - return false; - - if (cur.first_child()) - { - ++walker._depth; - cur = cur.first_child(); - } - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - // Borland C++ workaround - while (!cur.next_sibling() && cur != *this && (bool)cur.parent()) - { - --walker._depth; - cur = cur.parent(); - } - - if (cur != *this) - cur = cur.next_sibling(); - } - } - while (cur && cur != *this); - } - - assert(walker._depth == -1); - - xml_node arg_end = *this; - return walker.end(arg_end); - } - - unsigned int xml_node::document_order() const - { - return 0; - } - - void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - if (!_root) return; - - xml_buffered_writer buffered_writer(writer, encoding); - - node_output(buffered_writer, *this, indent, flags, depth); - } - -#ifndef PUGIXML_NO_STL - void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - if (!_root) return; - - xml_writer_stream writer(stream); - - print(writer, indent, flags, encoding, depth); - } - - void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const - { - if (!_root) return; - - xml_writer_stream writer(stream); - - print(writer, indent, flags, encoding_wchar, depth); - } -#endif - - ptrdiff_t xml_node::offset_debug() const - { - xml_node_struct* r = root()._root; - - if (!r) return -1; - - const char_t* buffer = static_cast(r)->buffer; - - if (!buffer) return -1; - - switch (type()) - { - case node_document: - return 0; - - case node_element: - case node_declaration: - case node_pi: - return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer; - - case node_pcdata: - case node_cdata: - case node_comment: - return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer; - - default: - return -1; - } - } - -#ifdef __BORLANDC__ - bool operator&&(const xml_node& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - bool operator||(const xml_node& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - xml_node_iterator::xml_node_iterator() - { - } - - xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) - { - } - - xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const - { - return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; - } - - bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const - { - return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; - } - - xml_node& xml_node_iterator::operator*() - { - assert(_wrap._root); - return _wrap; - } - - xml_node* xml_node_iterator::operator->() - { - assert(_wrap._root); - return &_wrap; - } - - const xml_node_iterator& xml_node_iterator::operator++() - { - assert(_wrap._root); - _wrap._root = _wrap._root->next_sibling; - return *this; - } - - xml_node_iterator xml_node_iterator::operator++(int) - { - xml_node_iterator temp = *this; - ++*this; - return temp; - } - - const xml_node_iterator& xml_node_iterator::operator--() - { - if (_wrap._root) _wrap = _wrap.previous_sibling(); - else _wrap = _parent.last_child(); - return *this; - } - - xml_node_iterator xml_node_iterator::operator--(int) - { - xml_node_iterator temp = *this; - --*this; - return temp; - } - - xml_attribute_iterator::xml_attribute_iterator() - { - } - - xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) - { - } - - xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const - { - return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; - } - - bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const - { - return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; - } - - xml_attribute& xml_attribute_iterator::operator*() - { - assert(_wrap._attr); - return _wrap; - } - - xml_attribute* xml_attribute_iterator::operator->() - { - assert(_wrap._attr); - return &_wrap; - } - - const xml_attribute_iterator& xml_attribute_iterator::operator++() - { - assert(_wrap._attr); - _wrap._attr = _wrap._attr->next_attribute; - return *this; - } - - xml_attribute_iterator xml_attribute_iterator::operator++(int) - { - xml_attribute_iterator temp = *this; - ++*this; - return temp; - } - - const xml_attribute_iterator& xml_attribute_iterator::operator--() - { - if (_wrap._attr) _wrap = _wrap.previous_attribute(); - else _wrap = _parent.last_attribute(); - return *this; - } - - xml_attribute_iterator xml_attribute_iterator::operator--(int) - { - xml_attribute_iterator temp = *this; - --*this; - return temp; - } - - const char* xml_parse_result::description() const - { - switch (status) - { - case status_ok: return "No error"; - - case status_file_not_found: return "File was not found"; - case status_io_error: return "Error reading from file/stream"; - case status_out_of_memory: return "Could not allocate memory"; - case status_internal_error: return "Internal error occurred"; - - case status_unrecognized_tag: return "Could not determine tag type"; - - case status_bad_pi: return "Error parsing document declaration/processing instruction"; - case status_bad_comment: return "Error parsing comment"; - case status_bad_cdata: return "Error parsing CDATA section"; - case status_bad_doctype: return "Error parsing document type declaration"; - case status_bad_pcdata: return "Error parsing PCDATA section"; - case status_bad_start_element: return "Error parsing start element tag"; - case status_bad_attribute: return "Error parsing element attribute"; - case status_bad_end_element: return "Error parsing end element tag"; - case status_end_element_mismatch: return "Start-end tags mismatch"; - - default: return "Unknown error"; - } - } - - xml_document::xml_document(): _buffer(0) - { - create(); - } - - xml_document::~xml_document() - { - destroy(); - } - - void xml_document::create() - { - destroy(); - - // initialize sentinel page - STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory)); - - // align upwards to page boundary - void* page_memory = reinterpret_cast((reinterpret_cast(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); - - // prepare page structure - xml_memory_page* page = xml_memory_page::construct(page_memory); - - page->busy_size = xml_memory_page_size; - - // allocate new root - _root = new (page->data) xml_document_struct(page); - _root->prev_sibling_c = _root; - - // setup allocator - xml_allocator& a = static_cast(_root)->allocator; - a = xml_allocator(page); - - // setup sentinel page - page->allocator = &a; - } - - void xml_document::destroy() - { - // destroy static storage - if (_buffer) - { - global_deallocate(_buffer); - _buffer = 0; - } - - // destroy dynamic storage, leave sentinel page (it's in static memory) - if (_root) - { - xml_memory_page* root_page = reinterpret_cast(_root->header & xml_memory_page_pointer_mask); - assert(root_page && !root_page->prev && !root_page->memory); - - // destroy all pages - for (xml_memory_page* page = root_page->next; page; ) - { - xml_memory_page* next = page->next; - - xml_allocator::deallocate_page(page); - - page = next; - } - - // cleanup root page - root_page->allocator = 0; - root_page->next = 0; - root_page->busy_size = root_page->freed_size = 0; - - _root = 0; - } - } - -#ifndef PUGIXML_NO_STL - xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) - { - create(); - - return load_stream_impl(*this, stream, options, encoding); - } - - xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) - { - create(); - - return load_stream_impl(*this, stream, options, encoding_wchar); - } -#endif - - xml_parse_result xml_document::load(const char_t* contents, unsigned int options) - { - create(); - - // Force native encoding (skip autodetection) - #ifdef PUGIXML_WCHAR_MODE - xml_encoding encoding = encoding_wchar; - #else - xml_encoding encoding = encoding_utf8; - #endif - - return load_buffer(contents, impl::strlen(contents) * sizeof(char_t), options, encoding); - } - - xml_parse_result xml_document::parse(char* xmlstr, unsigned int options) - { - return load_buffer_inplace(xmlstr, strlen(xmlstr), options, encoding_utf8); - } - - xml_parse_result xml_document::parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options) - { - return load_buffer_inplace_own(xmlstr, strlen(xmlstr), options, encoding_utf8); - } - - xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding) - { - create(); - - FILE* file = fopen(path, "rb"); - if (!file) return make_parse_result(status_file_not_found); - - fseek(file, 0, SEEK_END); - long length = ftell(file); - fseek(file, 0, SEEK_SET); - - if (length < 0) - { - fclose(file); - return make_parse_result(status_io_error); - } - - char* s = static_cast(global_allocate(length > 0 ? length : 1)); - - if (!s) - { - fclose(file); - return make_parse_result(status_out_of_memory); - } - - size_t read = fread(s, 1, (size_t)length, file); - fclose(file); - - if (read != (size_t)length) - { - global_deallocate(s); - return make_parse_result(status_io_error); - } - - return load_buffer_inplace_own(s, length, options, encoding); - } - - xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own) - { - create(); - - // get actual encoding - xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size); - - // get private buffer - char_t* buffer = 0; - size_t length = 0; - - if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory); - - // delete original buffer if we performed a conversion - if (own && buffer != contents) global_deallocate(contents); - - // parse - xml_parse_result res = xml_parser::parse(buffer, length, _root, options); - - // remember encoding - res.encoding = buffer_encoding; - - // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself - if (own || buffer != contents) _buffer = buffer; - - return res; - } - - xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - return load_buffer_impl(const_cast(contents), size, options, encoding, false, false); - } - - xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - return load_buffer_impl(contents, size, options, encoding, true, false); - } - - xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - return load_buffer_impl(contents, size, options, encoding, true, true); - } - - void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding)); - - xml_buffered_writer buffered_writer(writer, encoding); - - if (!(flags & format_no_declaration) && !has_declaration(*this)) - { - buffered_writer.write(PUGIXML_TEXT("")); - if (!(flags & format_raw)) buffered_writer.write('\n'); - } - - node_output(buffered_writer, *this, indent, flags, 0); - } - -#ifndef PUGIXML_NO_STL - void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - xml_writer_stream writer(stream); - - save(writer, indent, flags, encoding); - } - - void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags) const - { - xml_writer_stream writer(stream); - - save(writer, indent, flags, encoding_wchar); - } -#endif - - bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - FILE* file = fopen(path, "wb"); - if (!file) return false; - - xml_writer_file writer(file); - save(writer, indent, flags, encoding); - - fclose(file); - - return true; - } - - void xml_document::precompute_document_order() - { - } - -#ifndef PUGIXML_NO_STL - std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) - { - assert(str); - - STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); - - size_t length = wcslen(str); - - // first pass: get length in utf8 characters - size_t size = sizeof(wchar_t) == 2 ? - utf_decoder::decode_utf16_block(reinterpret_cast(str), length, 0) : - utf_decoder::decode_utf32_block(reinterpret_cast(str), length, 0); - - // allocate resulting string - std::string result; - result.resize(size); - - // second pass: convert to utf8 - if (size > 0) - { - uint8_t* begin = reinterpret_cast(&result[0]); - uint8_t* end = sizeof(wchar_t) == 2 ? - utf_decoder::decode_utf16_block(reinterpret_cast(str), length, begin) : - utf_decoder::decode_utf32_block(reinterpret_cast(str), length, begin); - - // truncate invalid output - assert(begin <= end && static_cast(end - begin) <= result.size()); - result.resize(static_cast(end - begin)); - } - - return result; - } - - std::wstring PUGIXML_FUNCTION as_utf16(const char* str) - { - return as_wide(str); - } - - std::wstring PUGIXML_FUNCTION as_wide(const char* str) - { - assert(str); - - const uint8_t* data = reinterpret_cast(str); - size_t size = strlen(str); - - // first pass: get length in wchar_t - size_t length = utf_decoder::decode_utf8_block(data, size, 0); - - // allocate resulting string - std::wstring result; - result.resize(length); - - // second pass: convert to wchar_t - if (length > 0) - { - wchar_writer::value_type begin = reinterpret_cast(&result[0]); - wchar_writer::value_type end = utf_decoder::decode_utf8_block(data, size, begin); - - // truncate invalid output - assert(begin <= end && static_cast(end - begin) <= result.size()); - result.resize(static_cast(end - begin)); - } - - return result; - } -#endif - - void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) - { - global_allocate = allocate; - global_deallocate = deallocate; - } - - allocation_function PUGIXML_FUNCTION get_memory_allocation_function() - { - return global_allocate; - } - - deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() - { - return global_deallocate; - } -} - -#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) -namespace std -{ - // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } -} -#endif - -#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) -namespace std -{ - // Workarounds for (non-standard) iterator category detection - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } -} -#endif - -/** - * Copyright (c) 2006-2010 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/** + * pugixml parser - version 0.9 + * -------------------------------------------------------- + * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at http://code.google.com/p/pugixml/ + * + * This library is distributed under the MIT License. See notice at the end + * of this file. + * + * This work is based on the pugxml parser, which is: + * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) + */ + +#include "pugixml.hpp" + +#include +#include +#include +#include +#include +#include + +#ifndef PUGIXML_NO_STL +# include +# include +# include +#endif + +// For placement new +#include + +#ifdef _MSC_VER +# pragma warning(disable: 4127) // conditional expression is constant +# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable +# pragma warning(disable: 4324) // structure was padded due to __declspec(align()) +# pragma warning(disable: 4996) // this function or variable may be unsafe +#endif + +#ifdef __INTEL_COMPILER +# pragma warning(disable: 177) // function was declared but never referenced +# pragma warning(disable: 1478 1786) // function was declared "deprecated" +#endif + +#ifdef __BORLANDC__ +# pragma warn -8008 // condition is always false +# pragma warn -8066 // unreachable code +#endif + +#ifdef __SNC__ +# pragma diag_suppress=178 // function waS declared but never referenced +#endif + +// uintptr_t +#if !defined(_MSC_VER) || _MSC_VER >= 1600 +# include +#else +# if _MSC_VER < 1300 +// No native uintptr_t in MSVC6 +typedef size_t uintptr_t; +# endif +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +#endif + +// Inlining controls +#if defined(_MSC_VER) && _MSC_VER >= 1300 +# define PUGIXML_NO_INLINE __declspec(noinline) +#elif defined(__GNUC__) +# define PUGIXML_NO_INLINE __attribute__((noinline)) +#else +# define PUGIXML_NO_INLINE +#endif + +// Simple static assertion +#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } + +// Memory allocation +namespace +{ + void* default_allocate(size_t size) + { + return malloc(size); + } + + void default_deallocate(void* ptr) + { + free(ptr); + } + + pugi::allocation_function global_allocate = default_allocate; + pugi::deallocation_function global_deallocate = default_deallocate; +} + +// String utilities prototypes +namespace pugi +{ + namespace impl + { + size_t strlen(const char_t* s); + bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); + void widen_ascii(wchar_t* dest, const char* source); + } +} + +// String utilities +namespace pugi +{ + namespace impl + { + // Get string length + size_t strlen(const char_t* s) + { + #ifdef PUGIXML_WCHAR_MODE + return wcslen(s); + #else + return ::strlen(s); + #endif + } + + // Compare two strings + bool PUGIXML_FUNCTION strequal(const char_t* src, const char_t* dst) + { + #ifdef PUGIXML_WCHAR_MODE + return wcscmp(src, dst) == 0; + #else + return strcmp(src, dst) == 0; + #endif + } + + // Compare lhs with [rhs_begin, rhs_end) + bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) + { + for (size_t i = 0; i < count; ++i) + if (lhs[i] != rhs[i]) + return false; + + return lhs[count] == 0; + } + + // Character set pattern match. + static bool strequalwild_cset(const char_t** src, const char_t** dst) + { + int find = 0, excl = 0, star = 0; + + if (**src == '!') + { + excl = 1; + ++(*src); + } + + while (**src != ']' || star == 1) + { + if (find == 0) + { + if (**src == '-' && *(*src-1) < *(*src+1) && *(*src+1) != ']' && star == 0) + { + if (**dst >= *(*src-1) && **dst <= *(*src+1)) + { + find = 1; + ++(*src); + } + } + else if (**src == **dst) find = 1; + } + ++(*src); + star = 0; + } + + if (excl == 1) find = (1 - find); + if (find == 1) ++(*dst); + + return find == 0; + } + + // Wildcard pattern match. + static bool strequalwild_astr(const char_t** src, const char_t** dst) + { + int find = 1; + ++(*src); + while ((**dst != 0 && **src == '?') || **src == '*') + { + if(**src == '?') ++(*dst); + ++(*src); + } + while (**src == '*') ++(*src); + if (**dst == 0 && **src != 0) return 0; + if (**dst == 0 && **src == 0) return 1; + else + { + if (!impl::strequalwild(*src,*dst)) + { + do + { + ++(*dst); + while(**src != **dst && **src != '[' && **dst != 0) + ++(*dst); + } + while ((**dst != 0) ? !impl::strequalwild(*src,*dst) : 0 != (find=0)); + } + if (**dst == 0 && **src == 0) find = 1; + return find == 0; + } + } + + // Compare two strings, with globbing, and character sets. + bool PUGIXML_FUNCTION strequalwild(const char_t* src, const char_t* dst) + { + int find = 1; + for(; *src != 0 && find == 1 && *dst != 0; ++src) + { + switch (*src) + { + case '?': ++dst; break; + case '[': ++src; find = !strequalwild_cset(&src,&dst); break; + case '*': find = !strequalwild_astr(&src,&dst); --src; break; + default : find = (int) (*src == *dst); ++dst; + } + } + while (*src == '*' && find == 1) ++src; + return (find == 1 && *dst == 0 && *src == 0); + } + +#ifdef PUGIXML_WCHAR_MODE + // Convert string to wide string, assuming all symbols are ASCII + void widen_ascii(wchar_t* dest, const char* source) + { + for (const char* i = source; *i; ++i) *dest++ = *i; + *dest = 0; + } +#endif + } +} + +namespace pugi +{ + static const size_t xml_memory_page_size = 32768; + + static const uintptr_t xml_memory_page_alignment = 32; + static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); + static const uintptr_t xml_memory_page_name_allocated_mask = 16; + static const uintptr_t xml_memory_page_value_allocated_mask = 8; + static const uintptr_t xml_memory_page_type_mask = 7; + + struct xml_allocator; + + struct xml_memory_page + { + static xml_memory_page* construct(void* memory) + { + if (!memory) return 0; + + xml_memory_page* result = static_cast(memory); + + result->allocator = 0; + result->memory = 0; + result->prev = 0; + result->next = 0; + result->busy_size = 0; + result->freed_size = 0; + + return result; + } + + xml_allocator* allocator; + + void* memory; + + xml_memory_page* prev; + xml_memory_page* next; + + size_t busy_size; + size_t freed_size; + + char data[1]; + }; + + struct xml_memory_string_header + { + xml_memory_page* page; + size_t full_size; + }; + + struct xml_allocator + { + xml_allocator(xml_memory_page* root): _root(root), _busy_size(root ? root->busy_size : 0) + { + } + + xml_memory_page* allocate_page(size_t data_size) + { + size_t size = offsetof(xml_memory_page, data) + data_size; + + // allocate block with some alignment, leaving memory for worst-case padding + void* memory = global_allocate(size + xml_memory_page_alignment); + if (!memory) return 0; + + // align upwards to page boundary + void* page_memory = reinterpret_cast((reinterpret_cast(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); + + // prepare page structure + xml_memory_page* page = xml_memory_page::construct(page_memory); + + page->memory = memory; + page->allocator = _root->allocator; + + return page; + } + + static void deallocate_page(xml_memory_page* page) + { + global_deallocate(page->memory); + } + + void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); + + void* allocate_memory(size_t size, xml_memory_page*& out_page) + { + if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page); + + void* buf = _root->data + _busy_size; + + _busy_size += size; + + out_page = _root; + + return buf; + } + + void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) + { + assert(ptr >= page->data && ptr < page->data + xml_memory_page_size); + (void)!ptr; + + if (page == _root) page->busy_size = _busy_size; + + page->freed_size += size; + assert(page->freed_size <= page->busy_size); + + if (page->freed_size == page->busy_size) + { + if (page->next == 0) + { + assert(_root == page); + + // top page freed, just reset sizes + page->busy_size = page->freed_size = 0; + _busy_size = 0; + } + else + { + assert(_root != page); + assert(page->prev); + + // remove from the list + page->prev->next = page->next; + page->next->prev = page->prev; + + // deallocate + deallocate_page(page); + } + } + } + + char_t* allocate_string(size_t length) + { + // get actual size, rounded up to pointer alignment boundary + size_t size = ((length * sizeof(char_t)) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1); + + // allocate memory for string and header block + size_t full_size = sizeof(xml_memory_string_header) + size; + + xml_memory_page* page; + xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); + + if (!header) return 0; + + // setup header + header->page = page; + header->full_size = full_size; + + return reinterpret_cast(header + 1); + } + + void deallocate_string(char_t* string) + { + // get header + xml_memory_string_header* header = reinterpret_cast(string) - 1; + + // deallocate + deallocate_memory(header, header->full_size, header->page); + } + + xml_memory_page* _root; + size_t _busy_size; + }; + + PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) + { + const size_t large_allocation_threshold = xml_memory_page_size / 4; + + xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); + if (!page) return 0; + + if (size <= large_allocation_threshold) + { + _root->busy_size = _busy_size; + + // insert page at the end of linked list + page->prev = _root; + _root->next = page; + _root = page; + + _busy_size = size; + } + else + { + // insert page before the end of linked list + assert(_root->prev); + + page->prev = _root->prev; + page->next = _root; + + _root->prev->next = page; + _root->prev = page; + } + + // allocate inside page + page->busy_size = size; + + out_page = page; + return page->data; + } + + /// A 'name=value' XML attribute structure. + struct xml_attribute_struct + { + /// Default ctor + xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) + { + } + + uintptr_t header; + + char_t* name; ///< Pointer to attribute name. + char_t* value; ///< Pointer to attribute value. + + xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) + xml_attribute_struct* next_attribute; ///< Next attribute + }; + + /// An XML document tree node. + struct xml_node_struct + { + /// Default ctor + /// \param type - node type + xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | type), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) + { + } + + uintptr_t header; + + xml_node_struct* parent; ///< Pointer to parent + + char_t* name; ///< Pointer to element name. + char_t* value; ///< Pointer to any associated string data. + + xml_node_struct* first_child; ///< First child + + xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list) + xml_node_struct* next_sibling; ///< Right brother + + xml_attribute_struct* first_attribute; ///< First attribute + }; + + struct xml_document_struct: public xml_node_struct + { + xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), allocator(0), buffer(0) + { + } + + xml_allocator allocator; + const char_t* buffer; + }; + + inline xml_allocator& get_allocator(const xml_node_struct* node) + { + assert(node); + + return *reinterpret_cast(node->header & xml_memory_page_pointer_mask)->allocator; + } +} + +// Low-level DOM operations +namespace +{ + using namespace pugi; + + inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) + { + xml_memory_page* page; + void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page); + + return new (memory) xml_attribute_struct(page); + } + + inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) + { + xml_memory_page* page; + void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page); + + return new (memory) xml_node_struct(page, type); + } + + inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) + { + uintptr_t header = a->header; + + if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name); + if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value); + + alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); + } + + inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) + { + uintptr_t header = n->header; + + if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); + if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); + + for (xml_attribute_struct* attr = n->first_attribute; attr; ) + { + xml_attribute_struct* next = attr->next_attribute; + + destroy_attribute(attr, alloc); + + attr = next; + } + + for (xml_node_struct* child = n->first_child; child; ) + { + xml_node_struct* next = child->next_sibling; + + destroy_node(child, alloc); + + child = next; + } + + alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); + } + + PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) + { + xml_node_struct* child = allocate_node(alloc, type); + if (!child) return 0; + + child->parent = node; + + xml_node_struct* first_child = node->first_child; + + if (first_child) + { + xml_node_struct* last_child = first_child->prev_sibling_c; + + last_child->next_sibling = child; + child->prev_sibling_c = last_child; + first_child->prev_sibling_c = child; + } + else + { + node->first_child = child; + child->prev_sibling_c = child; + } + + return child; + } + + PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc) + { + xml_attribute_struct* a = allocate_attribute(alloc); + if (!a) return 0; + + xml_attribute_struct* first_attribute = node->first_attribute; + + if (first_attribute) + { + xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c; + + last_attribute->next_attribute = a; + a->prev_attribute_c = last_attribute; + first_attribute->prev_attribute_c = a; + } + else + { + node->first_attribute = a; + a->prev_attribute_c = a; + } + + return a; + } +} + +// Helper classes for code generation +namespace +{ + struct opt_false + { + enum { value = 0 }; + }; + + struct opt_true + { + enum { value = 1 }; + }; +} + +// Unicode utilities +namespace +{ + inline uint16_t endian_swap(uint16_t value) + { + return static_cast(((value & 0xff) << 8) | (value >> 8)); + } + + inline uint32_t endian_swap(uint32_t value) + { + return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); + } + + struct utf8_counter + { + typedef size_t value_type; + + static value_type low(value_type result, uint32_t ch) + { + // U+0000..U+007F + if (ch < 0x80) return result + 1; + // U+0080..U+07FF + else if (ch < 0x800) return result + 2; + // U+0800..U+FFFF + else return result + 3; + } + + static value_type high(value_type result, uint32_t) + { + // U+10000..U+10FFFF + return result + 4; + } + }; + + struct utf8_writer + { + typedef uint8_t* value_type; + + static value_type low(value_type result, uint32_t ch) + { + // U+0000..U+007F + if (ch < 0x80) + { + *result = static_cast(ch); + return result + 1; + } + // U+0080..U+07FF + else if (ch < 0x800) + { + result[0] = static_cast(0xC0 | (ch >> 6)); + result[1] = static_cast(0x80 | (ch & 0x3F)); + return result + 2; + } + // U+0800..U+FFFF + else + { + result[0] = static_cast(0xE0 | (ch >> 12)); + result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + result[2] = static_cast(0x80 | (ch & 0x3F)); + return result + 3; + } + } + + static value_type high(value_type result, uint32_t ch) + { + // U+10000..U+10FFFF + result[0] = static_cast(0xF0 | (ch >> 18)); + result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); + result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + result[3] = static_cast(0x80 | (ch & 0x3F)); + return result + 4; + } + + static value_type any(value_type result, uint32_t ch) + { + return (ch < 0x10000) ? low(result, ch) : high(result, ch); + } + }; + + struct utf16_counter + { + typedef size_t value_type; + + static value_type low(value_type result, uint32_t) + { + return result + 1; + } + + static value_type high(value_type result, uint32_t) + { + return result + 2; + } + }; + + struct utf16_writer + { + typedef uint16_t* value_type; + + static value_type low(value_type result, uint32_t ch) + { + *result = static_cast(ch); + + return result + 1; + } + + static value_type high(value_type result, uint32_t ch) + { + uint32_t msh = (uint32_t)(ch - 0x10000) >> 10; + uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff; + + result[0] = static_cast(0xD800 + msh); + result[1] = static_cast(0xDC00 + lsh); + + return result + 2; + } + + static value_type any(value_type result, uint32_t ch) + { + return (ch < 0x10000) ? low(result, ch) : high(result, ch); + } + }; + + struct utf32_counter + { + typedef size_t value_type; + + static value_type low(value_type result, uint32_t) + { + return result + 1; + } + + static value_type high(value_type result, uint32_t) + { + return result + 1; + } + }; + + struct utf32_writer + { + typedef uint32_t* value_type; + + static value_type low(value_type result, uint32_t ch) + { + *result = ch; + + return result + 1; + } + + static value_type high(value_type result, uint32_t ch) + { + *result = ch; + + return result + 1; + } + + static value_type any(value_type result, uint32_t ch) + { + *result = ch; + + return result + 1; + } + }; + + template struct wchar_selector; + + template <> struct wchar_selector<2> + { + typedef uint16_t type; + typedef utf16_counter counter; + typedef utf16_writer writer; + }; + + template <> struct wchar_selector<4> + { + typedef uint32_t type; + typedef utf32_counter counter; + typedef utf32_writer writer; + }; + + typedef wchar_selector::counter wchar_counter; + typedef wchar_selector::writer wchar_writer; + + template struct utf_decoder + { + static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) + { + const uint8_t utf8_byte_mask = 0x3f; + + while (size) + { + uint8_t lead = *data; + + // 0xxxxxxx -> U+0000..U+007F + if (lead < 0x80) + { + result = Traits::low(result, lead); + data += 1; + size -= 1; + + // process aligned single-byte (ascii) blocks + if ((reinterpret_cast(data) & 3) == 0) + { + while (size >= 4 && (*reinterpret_cast(data) & 0x80808080) == 0) + { + result = Traits::low(result, data[0]); + result = Traits::low(result, data[1]); + result = Traits::low(result, data[2]); + result = Traits::low(result, data[3]); + data += 4; + size -= 4; + } + } + } + // 110xxxxx -> U+0080..U+07FF + else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) + { + result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); + data += 2; + size -= 2; + } + // 1110xxxx -> U+0800-U+FFFF + else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) + { + result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); + data += 3; + size -= 3; + } + // 11110xxx -> U+10000..U+10FFFF + else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) + { + result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); + data += 4; + size -= 4; + } + // 10xxxxxx or 11111xxx -> invalid + else + { + data += 1; + size -= 1; + } + } + + return result; + } + + static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) + { + const uint16_t* end = data + size; + + while (data < end) + { + uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; + + // U+0000..U+D7FF + if (lead < 0xD800) + { + result = Traits::low(result, lead); + data += 1; + } + // U+E000..U+FFFF + else if ((unsigned)(lead - 0xE000) < 0x2000) + { + result = Traits::low(result, lead); + data += 1; + } + // surrogate pair lead + else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end) + { + uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; + + if ((unsigned)(next - 0xDC00) < 0x400) + { + result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); + data += 2; + } + else + { + data += 1; + } + } + else + { + data += 1; + } + } + + return result; + } + + static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) + { + const uint32_t* end = data + size; + + while (data < end) + { + uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; + + // U+0000..U+FFFF + if (lead < 0x10000) + { + result = Traits::low(result, lead); + data += 1; + } + // U+10000..U+10FFFF + else + { + result = Traits::high(result, lead); + data += 1; + } + } + + return result; + } + }; + + template inline void convert_utf_endian_swap(T* result, const T* data, size_t length) + { + for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]); + } + + inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) + { + for (size_t i = 0; i < length; ++i) result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); + } +} + +namespace +{ + using namespace pugi; + + enum chartype_t + { + ct_parse_pcdata = 1, // \0, &, \r, < + ct_parse_attr = 2, // \0, &, \r, ', " + ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab + ct_space = 8, // \r, \n, space, tab + ct_parse_cdata = 16, // \0, ], >, \r + ct_parse_comment = 32, // \0, -, >, \r + ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . + ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : + }; + + const unsigned char chartype_table[256] = + { + 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 + 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 + 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 + 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 + + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 + }; + +#ifdef PUGIXML_WCHAR_MODE + #define IS_CHARTYPE(c, ct) ((static_cast(c) < 128 ? chartype_table[static_cast(c)] : chartype_table[128]) & (ct)) +#else + #define IS_CHARTYPE(c, ct) (chartype_table[static_cast(c)] & (ct)) +#endif + + enum output_chartype_t + { + oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > + oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, " + }; + + const unsigned char output_chartype_table[256] = + { + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 + 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63 + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + +#ifdef PUGIXML_WCHAR_MODE + #define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast(c) < 128 ? output_chartype_table[static_cast(c)] : output_chartype_table[128]) & (ct)) +#else + #define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast(c)] & (ct)) +#endif + + bool is_little_endian() + { + unsigned int ui = 1; + + return *reinterpret_cast(&ui) == 1; + } + + xml_encoding get_wchar_encoding() + { + STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); + + if (sizeof(wchar_t) == 2) + return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + else + return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + } + + xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) + { + // replace wchar encoding with utf implementation + if (encoding == encoding_wchar) return get_wchar_encoding(); + + // replace utf16 encoding with utf16 with specific endianness + if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + // replace utf32 encoding with utf32 with specific endianness + if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + // only do autodetection if no explicit encoding is requested + if (encoding != encoding_auto) return encoding; + + // try to guess encoding (based on XML specification, Appendix F.1) + const uint8_t* data = static_cast(contents); + + // look for BOM in first few bytes + if (size > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0xfe && data[3] == 0xff) return encoding_utf32_be; + if (size > 4 && data[0] == 0xff && data[1] == 0xfe && data[2] == 0 && data[3] == 0) return encoding_utf32_le; + if (size > 2 && data[0] == 0xfe && data[1] == 0xff) return encoding_utf16_be; + if (size > 2 && data[0] == 0xff && data[1] == 0xfe) return encoding_utf16_le; + if (size > 3 && data[0] == 0xef && data[1] == 0xbb && data[2] == 0xbf) return encoding_utf8; + + // look for <, 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && data[3] == 0x3c) return encoding_utf32_be; + if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0 && data[3] == 0) return encoding_utf32_le; + if (size > 4 && data[0] == 0 && data[1] == 0x3c && data[2] == 0 && data[3] == 0x3f) return encoding_utf16_be; + if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0x3f && data[3] == 0) return encoding_utf16_le; + if (size > 4 && data[0] == 0x3c && data[1] == 0x3f && data[2] == 0x78 && data[3] == 0x6d) return encoding_utf8; + + // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) + if (size > 2 && data[0] == 0 && data[1] == 0x3c) return encoding_utf16_be; + if (size > 2 && data[0] == 0x3c && data[1] == 0) return encoding_utf16_le; + + // no known BOM detected, assume utf8 + return encoding_utf8; + } + + bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) + { + if (is_mutable) + { + out_buffer = static_cast(const_cast(contents)); + } + else + { + void* buffer = global_allocate(size > 0 ? size : 1); + if (!buffer) return false; + + memcpy(buffer, contents, size); + + out_buffer = static_cast(buffer); + } + + out_length = size / sizeof(char_t); + + return true; + } + +#ifdef PUGIXML_WCHAR_MODE + inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re) + { + return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || + (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); + } + + bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) + { + const char_t* data = static_cast(contents); + + if (is_mutable) + { + out_buffer = const_cast(data); + } + else + { + out_buffer = static_cast(global_allocate(size > 0 ? size : 1)); + if (!out_buffer) return false; + } + + out_length = size / sizeof(char_t); + + convert_wchar_endian_swap(out_buffer, data, out_length); + + return true; + } + + bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) + { + const uint8_t* data = static_cast(contents); + + // first pass: get length in wchar_t units + out_length = utf_decoder::decode_utf8_block(data, size, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf8 input to wchar_t + wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); + wchar_writer::value_type out_end = utf_decoder::decode_utf8_block(data, size, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint16_t* data = static_cast(contents); + size_t length = size / sizeof(uint16_t); + + // first pass: get length in wchar_t units + out_length = utf_decoder::decode_utf16_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf16 input to wchar_t + wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); + wchar_writer::value_type out_end = utf_decoder::decode_utf16_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint32_t* data = static_cast(contents); + size_t length = size / sizeof(uint32_t); + + // first pass: get length in wchar_t units + out_length = utf_decoder::decode_utf32_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf32 input to wchar_t + wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); + wchar_writer::value_type out_end = utf_decoder::decode_utf32_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) + { + // get native encoding + xml_encoding wchar_encoding = get_wchar_encoding(); + + // fast path: no conversion required + if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + + // only endian-swapping is required + if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); + + // source encoding is utf8 + if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size); + + // source encoding is utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return (native_encoding == encoding) ? + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); + } + + // source encoding is utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return (native_encoding == encoding) ? + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return false; + } +#else + template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint16_t* data = static_cast(contents); + size_t length = size / sizeof(uint16_t); + + // first pass: get length in utf8 units + out_length = utf_decoder::decode_utf16_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf16 input to utf8 + uint8_t* out_begin = reinterpret_cast(out_buffer); + uint8_t* out_end = utf_decoder::decode_utf16_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint32_t* data = static_cast(contents); + size_t length = size / sizeof(uint32_t); + + // first pass: get length in utf8 units + out_length = utf_decoder::decode_utf32_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf32 input to utf8 + uint8_t* out_begin = reinterpret_cast(out_buffer); + uint8_t* out_end = utf_decoder::decode_utf32_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) + { + // fast path: no conversion required + if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + + // source encoding is utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return (native_encoding == encoding) ? + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); + } + + // source encoding is utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return (native_encoding == encoding) ? + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return false; + } +#endif + + bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) + { + size_t source_length = impl::strlen(source); + + if (dest && impl::strlen(dest) >= source_length) + { + memcpy(dest, source, (source_length + 1) * sizeof(char_t)); + + return true; + } + else + { + xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator; + + char_t* buf = alloc->allocate_string(source_length + 1); + if (!buf) return false; + + memcpy(buf, source, (source_length + 1) * sizeof(char_t)); + + if (header & header_mask) alloc->deallocate_string(dest); + + dest = buf; + header |= header_mask; + + return true; + } + } + + struct gap + { + char_t* end; + size_t size; + + gap(): end(0), size(0) + { + } + + // Push new gap, move s count bytes further (skipping the gap). + // Collapse previous gap. + void push(char_t*& s, size_t count) + { + if (end) // there was a gap already; collapse it + { + // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) + memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); + } + + s += count; // end of current gap + + // "merge" two gaps + end = s; + size += count; + } + + // Collapse all gaps, return past-the-end pointer + char_t* flush(char_t* s) + { + if (end) + { + // Move [old_gap_end, current_pos) to [old_gap_start, ...) + memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); + + return s - size; + } + else return s; + } + }; + + char_t* strconv_escape(char_t* s, gap& g) + { + char_t* stre = s + 1; + + switch (*stre) + { + case '#': // &#... + { + unsigned int ucsc = 0; + + if (stre[1] == 'x') // &#x... (hex code) + { + stre += 2; + + char_t ch = *stre; + + if (ch == ';') return stre; + + for (;;) + { + if (static_cast(ch - '0') <= 9) + ucsc = 16 * ucsc + (ch - '0'); + else if (static_cast((ch | ' ') - 'a') <= 5) + ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); + else if (ch == ';') + break; + else // cancel + return stre; + + ch = *++stre; + } + + ++stre; + } + else // &#... (dec code) + { + char_t ch = *++stre; + + if (ch == ';') return stre; + + for (;;) + { + if (static_cast(ch - '0') <= 9) + ucsc = 10 * ucsc + (ch - '0'); + else if (ch == ';') + break; + else // cancel + return stre; + + ch = *++stre; + } + + ++stre; + } + + #ifdef PUGIXML_WCHAR_MODE + s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc)); + #else + s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); + #endif + + g.push(s, stre - s); + return stre; + } + case 'a': // &a + { + ++stre; + + if (*stre == 'm') // &am + { + if (*++stre == 'p' && *++stre == ';') // & + { + *s++ = '&'; + ++stre; + + g.push(s, stre - s); + return stre; + } + } + else if (*stre == 'p') // &ap + { + if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' + { + *s++ = '\''; + ++stre; + + g.push(s, stre - s); + return stre; + } + } + break; + } + case 'g': // &g + { + if (*++stre == 't' && *++stre == ';') // > + { + *s++ = '>'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + case 'l': // &l + { + if (*++stre == 't' && *++stre == ';') // < + { + *s++ = '<'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + case 'q': // &q + { + if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " + { + *s++ = '"'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + } + + return stre; + } + + // Utility macro for last character handling + #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) + + char_t* strconv_comment(char_t* s, char_t endch) + { + if (!*s) return 0; + + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s; + + if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair + { + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } + else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here + { + *g.flush(s) = 0; + + return s + (s[2] == '>' ? 3 : 2); + } + else if (*s == 0) + { + return 0; + } + else ++s; + } + } + + char_t* strconv_cdata(char_t* s, char_t endch) + { + if (!*s) return 0; + + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s; + + if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair + { + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } + else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here + { + *g.flush(s) = 0; + + return s + 1; + } + else if (*s == 0) + { + return 0; + } + else ++s; + } + } + + typedef char_t* (*strconv_pcdata_t)(char_t*); + + template struct strconv_pcdata_impl + { + static char_t* parse(char_t* s) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s; + + if (*s == '<') // PCDATA ends here + { + *g.flush(s) = 0; + + return s + 1; + } + else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair + { + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (*s == 0) + { + return s; + } + else ++s; + } + } + }; + + strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) + { + STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20); + + switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes) + { + case 0: return strconv_pcdata_impl::parse; + case 1: return strconv_pcdata_impl::parse; + case 2: return strconv_pcdata_impl::parse; + case 3: return strconv_pcdata_impl::parse; + default: return 0; // should not get here + } + } + + typedef char_t* (*strconv_attribute_t)(char_t*, char_t); + + template struct strconv_attribute_impl + { + static char_t* parse_wnorm(char_t* s, char_t end_quote) + { + gap g; + + // trim leading whitespaces + if (IS_CHARTYPE(*s, ct_space)) + { + char_t* str = s; + + do ++str; + while (IS_CHARTYPE(*str, ct_space)); + + g.push(s, str - s); + } + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s; + + if (*s == end_quote) + { + char_t* str = g.flush(s); + + do *str-- = 0; + while (IS_CHARTYPE(*str, ct_space)); + + return s + 1; + } + else if (IS_CHARTYPE(*s, ct_space)) + { + *s++ = ' '; + + if (IS_CHARTYPE(*s, ct_space)) + { + char_t* str = s + 1; + while (IS_CHARTYPE(*str, ct_space)) ++str; + + g.push(s, str - s); + } + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + + static char_t* parse_wconv(char_t* s, char_t end_quote) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s; + + if (*s == end_quote) + { + *g.flush(s) = 0; + + return s + 1; + } + else if (IS_CHARTYPE(*s, ct_space)) + { + if (*s == '\r') + { + *s++ = ' '; + + if (*s == '\n') g.push(s, 1); + } + else *s++ = ' '; + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + + static char_t* parse_eol(char_t* s, char_t end_quote) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; + + if (*s == end_quote) + { + *g.flush(s) = 0; + + return s + 1; + } + else if (*s == '\r') + { + *s++ = '\n'; + + if (*s == '\n') g.push(s, 1); + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + + static char_t* parse_simple(char_t* s, char_t end_quote) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; + + if (*s == end_quote) + { + *g.flush(s) = 0; + + return s + 1; + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + }; + + strconv_attribute_t get_strconv_attribute(unsigned int optmask) + { + STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); + + switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) + { + case 0: return strconv_attribute_impl::parse_simple; + case 1: return strconv_attribute_impl::parse_simple; + case 2: return strconv_attribute_impl::parse_eol; + case 3: return strconv_attribute_impl::parse_eol; + case 4: return strconv_attribute_impl::parse_wconv; + case 5: return strconv_attribute_impl::parse_wconv; + case 6: return strconv_attribute_impl::parse_wconv; + case 7: return strconv_attribute_impl::parse_wconv; + case 8: return strconv_attribute_impl::parse_wnorm; + case 9: return strconv_attribute_impl::parse_wnorm; + case 10: return strconv_attribute_impl::parse_wnorm; + case 11: return strconv_attribute_impl::parse_wnorm; + case 12: return strconv_attribute_impl::parse_wnorm; + case 13: return strconv_attribute_impl::parse_wnorm; + case 14: return strconv_attribute_impl::parse_wnorm; + case 15: return strconv_attribute_impl::parse_wnorm; + default: return 0; // should not get here + } + } + + inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) + { + xml_parse_result result = {status, offset, encoding_auto}; + return result; + } + + struct xml_parser + { + xml_allocator alloc; + char_t* error_offset; + jmp_buf error_handler; + + // Parser utilities. + #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; } + #define OPTSET(OPT) ( optmsk & OPT ) + #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) longjmp(error_handler, status_out_of_memory); } + #define POPNODE() { cursor = cursor->parent; } + #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; } + #define SCANWHILE(X) { while ((X)) ++s; } + #define ENDSEG() { ch = *s; *s = 0; ++s; } + #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err) + #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); } + + xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0) + { + } + + // DOCTYPE consists of nested sections of the following possible types: + // , , "...", '...' + // + // + // First group can not contain nested groups + // Second group can contain nested groups of the same type + // Third group can contain all other groups + void parse_doctype_primitive(char_t*& s) + { + if (*s == '"' || *s == '\'') + { + // quoted string + char_t ch = *s++; + SCANFOR(*s == ch); + if (!*s) THROW_ERROR(status_bad_doctype, s); + + s++; + } + else if (s[0] == '<' && s[1] == '?') + { + // + s += 2; + SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype + if (!*s) THROW_ERROR(status_bad_doctype, s); + + s += 2; + } + else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') + { + s += 4; + SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype + if (!*s) THROW_ERROR(status_bad_doctype, s); + + s += 4; + } + else THROW_ERROR(status_bad_doctype, s); + } + + void parse_doctype_ignore(char_t*& s) + { + assert(s[0] == '<' && s[1] == '!' && s[2] == '['); + s++; + + while (*s) + { + if (s[0] == '<' && s[1] == '!' && s[2] == '[') + { + // nested ignore section + parse_doctype_ignore(s); + } + else if (s[0] == ']' && s[1] == ']' && s[2] == '>') + { + // ignore section end + s += 3; + + return; + } + else s++; + } + + THROW_ERROR(status_bad_doctype, s); + } + + void parse_doctype(char_t*& s, char_t endch, bool toplevel) + { + assert(s[0] == '<' && s[1] == '!'); + s++; + + while (*s) + { + if (s[0] == '<' && s[1] == '!' && s[2] != '-') + { + if (s[2] == '[') + { + // ignore + parse_doctype_ignore(s); + } + else + { + // some control group + parse_doctype(s, endch, false); + } + } + else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') + { + // unknown tag (forbidden), or some primitive group + parse_doctype_primitive(s); + } + else if (*s == '>') + { + s++; + + return; + } + else s++; + } + + if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s); + } + + void parse_exclamation(char_t*& ref_s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) + { + // load into registers + char_t* s = ref_s; + + // parse node contents, starting with exclamation mark + ++s; + + if (*s == '-') // 'value = s; // Save the offset. + } + + if (OPTSET(parse_eol) && OPTSET(parse_comments)) + { + s = strconv_comment(s, endch); + + if (!s) THROW_ERROR(status_bad_comment, cursor->value); + } + else + { + // Scan for terminating '-->'. + SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')); + CHECK_ERROR(status_bad_comment, s); + + if (OPTSET(parse_comments)) + *s = 0; // Zero-terminate this segment at the first terminating '-'. + + s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. + } + + if (OPTSET(parse_comments)) + { + POPNODE(); // Pop since this is a standalone. + } + } + else THROW_ERROR(status_bad_comment, s); + } + else if (*s == '[') + { + // 'value = s; // Save the offset. + + if (OPTSET(parse_eol)) + { + s = strconv_cdata(s, endch); + + if (!s) THROW_ERROR(status_bad_cdata, cursor->value); + } + else + { + // Scan for terminating ']]>'. + SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); + CHECK_ERROR(status_bad_cdata, s); + + *s++ = 0; // Zero-terminate this segment. + } + + POPNODE(); // Pop since this is a standalone. + } + else // Flagged for discard, but we still have to scan for the terminator. + { + // Scan for terminating ']]>'. + SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); + CHECK_ERROR(status_bad_cdata, s); + + ++s; + } + + s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. + } + else THROW_ERROR(status_bad_cdata, s); + } + else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E')) + { + if (s[6] != 'E') THROW_ERROR(status_bad_doctype, s); + + s -= 2; + + parse_doctype(s, endch, true); + } + else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s); + else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s); + else THROW_ERROR(status_unrecognized_tag, s); + + // store from registers + ref_s = s; + } + + void parse_question(char_t*& ref_s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) + { + // load into registers + char_t* s = ref_s; + xml_node_struct* cursor = ref_cursor; + char_t ch = 0; + + // parse node contents, starting with question mark + ++s; + + // read PI target + char_t* target = s; + + if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s); + + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); + CHECK_ERROR(status_bad_pi, s); + + // determine node type; stricmp / strcasecmp is not portable + bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; + + if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi)) + { + if (declaration) + { + // disallow non top-level declarations + if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s); + + PUSHNODE(node_declaration); + } + else + { + PUSHNODE(node_pi); + } + + cursor->name = target; + + ENDSEG(); + + // parse value/attributes + if (ch == '?') + { + // empty node + if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); + s += (*s == '>'); + + POPNODE(); + } + else if (IS_CHARTYPE(ch, ct_space)) + { + SKIPWS(); + + // scan for tag end + char_t* value = s; + + SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); + CHECK_ERROR(status_bad_pi, s); + + if (declaration) + { + // replace ending ? with / so that 'element' terminates properly + *s = '/'; + + // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES + s = value; + } + else + { + // store value and step over > + cursor->value = value; + POPNODE(); + + ENDSEG(); + + s += (*s == '>'); + } + } + else THROW_ERROR(status_bad_pi, s); + } + else + { + // scan for tag end + SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); + CHECK_ERROR(status_bad_pi, s); + + s += (s[1] == '>' ? 2 : 1); + } + + // store from registers + ref_s = s; + ref_cursor = cursor; + } + + void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) + { + strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); + strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); + + char_t ch = 0; + xml_node_struct* cursor = xmldoc; + char_t* mark = s; + + while (*s != 0) + { + if (*s == '<') + { + ++s; + + LOC_TAG: + if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' + { + PUSHNODE(node_element); // Append a new node to the tree. + + cursor->name = s; + + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. + ENDSEG(); // Save char in 'ch', terminate & step over. + + if (ch == '>') + { + // end of tag + } + else if (IS_CHARTYPE(ch, ct_space)) + { + LOC_ATTRIBUTES: + while (true) + { + SKIPWS(); // Eat any whitespace. + + if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #... + { + xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. + if (!a) THROW_ERROR(status_out_of_memory, 0); + + a->name = s; // Save the offset. + + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. + CHECK_ERROR(status_bad_attribute, s); + + ENDSEG(); // Save char in 'ch', terminate & step over. + CHECK_ERROR(status_bad_attribute, s); + + if (IS_CHARTYPE(ch, ct_space)) + { + SKIPWS(); // Eat any whitespace. + CHECK_ERROR(status_bad_attribute, s); + + ch = *s; + ++s; + } + + if (ch == '=') // '<... #=...' + { + SKIPWS(); // Eat any whitespace. + + if (*s == '"' || *s == '\'') // '<... #="...' + { + ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. + ++s; // Step over the quote. + a->value = s; // Save the offset. + + s = strconv_attribute(s, ch); + + if (!s) THROW_ERROR(status_bad_attribute, a->value); + + // After this line the loop continues from the start; + // Whitespaces, / and > are ok, symbols and EOF are wrong, + // everything else will be detected + if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); + } + else THROW_ERROR(status_bad_attribute, s); + } + else THROW_ERROR(status_bad_attribute, s); + } + else if (*s == '/') + { + ++s; + + if (*s == '>') + { + POPNODE(); + s++; + break; + } + else if (*s == 0 && endch == '>') + { + POPNODE(); + break; + } + else THROW_ERROR(status_bad_start_element, s); + } + else if (*s == '>') + { + ++s; + + break; + } + else if (*s == 0 && endch == '>') + { + break; + } + else THROW_ERROR(status_bad_start_element, s); + } + + // !!! + } + else if (ch == '/') // '<#.../' + { + if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s); + + POPNODE(); // Pop. + + s += (*s == '>'); + } + else if (ch == 0) + { + // we stepped over null terminator, backtrack & handle closing tag + --s; + + if (endch != '>') THROW_ERROR(status_bad_start_element, s); + } + else THROW_ERROR(status_bad_start_element, s); + } + else if (*s == '/') + { + ++s; + + char_t* name = cursor->name; + if (!name) THROW_ERROR(status_end_element_mismatch, s); + + while (IS_CHARTYPE(*s, ct_symbol)) + { + if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s); + } + + if (*name) + { + if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s); + else THROW_ERROR(status_end_element_mismatch, s); + } + + POPNODE(); // Pop. + + SKIPWS(); + + if (*s == 0) + { + if (endch != '>') THROW_ERROR(status_bad_end_element, s); + } + else + { + if (*s != '>') THROW_ERROR(status_bad_end_element, s); + ++s; + } + } + else if (*s == '?') // 'header & xml_memory_page_type_mask) == node_declaration) goto LOC_ATTRIBUTES; + } + else if (*s == '!') // 'parent) + { + PUSHNODE(node_pcdata); // Append a new node on the tree. + cursor->value = s; // Save the offset. + + s = strconv_pcdata(s); + + POPNODE(); // Pop since this is a standalone. + + if (!*s) break; + } + else + { + SCANFOR(*s == '<'); // '...<' + if (!*s) break; + + ++s; + } + + // We're after '<' + goto LOC_TAG; + } + } + + // check that last tag is closed + if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s); + } + + static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* xmldoc, unsigned int optmsk) + { + // store buffer for offset_debug + static_cast(xmldoc)->buffer = buffer; + + // early-out for empty documents + if (length == 0) return make_parse_result(status_ok); + + // create parser on stack + xml_allocator& alloc = static_cast(xmldoc)->allocator; + + xml_parser parser(alloc); + + // save last character and make buffer zero-terminated (speeds up parsing) + char_t endch = buffer[length - 1]; + buffer[length - 1] = 0; + + // perform actual parsing + int error = setjmp(parser.error_handler); + + if (error == 0) + { + parser.parse(buffer, xmldoc, optmsk, endch); + } + + xml_parse_result result = make_parse_result(static_cast(error), parser.error_offset ? parser.error_offset - buffer : 0); + + // update allocator state + alloc = parser.alloc; + + // since we removed last character, we have to handle the only possible false positive + if (result && endch == '<') + { + // there's no possible well-formed document with < at the end + return make_parse_result(status_unrecognized_tag, length); + } + + return result; + } + }; + + // Output facilities + xml_encoding get_write_native_encoding() + { + #ifdef PUGIXML_WCHAR_MODE + return get_wchar_encoding(); + #else + return encoding_utf8; + #endif + } + + xml_encoding get_write_encoding(xml_encoding encoding) + { + // replace wchar encoding with utf implementation + if (encoding == encoding_wchar) return get_wchar_encoding(); + + // replace utf16 encoding with utf16 with specific endianness + if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + // replace utf32 encoding with utf32 with specific endianness + if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + // only do autodetection if no explicit encoding is requested + if (encoding != encoding_auto) return encoding; + + // assume utf8 encoding + return encoding_utf8; + } + +#ifdef PUGIXML_WCHAR_MODE + size_t get_valid_length(const char_t* data, size_t length) + { + assert(length > 0); + + // discard last character if it's the lead of a surrogate pair + return (sizeof(wchar_t) == 2 && (unsigned)(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; + } + + size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) + { + // only endian-swapping is required + if (need_endian_swap_utf(encoding, get_wchar_encoding())) + { + convert_wchar_endian_swap(reinterpret_cast(result), data, length); + + return length * sizeof(char_t); + } + + // convert to utf8 + if (encoding == encoding_utf8) + { + uint8_t* dest = reinterpret_cast(result); + + uint8_t* end = sizeof(wchar_t) == 2 ? + utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest) : + utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); + + return static_cast(end - dest); + } + + // convert to utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + uint16_t* dest = reinterpret_cast(result); + + // convert to native utf16 + uint16_t* end = utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint16_t); + } + + // convert to utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + uint32_t* dest = reinterpret_cast(result); + + // convert to native utf32 + uint32_t* end = utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint32_t); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return 0; + } +#else + size_t get_valid_length(const char_t* data, size_t length) + { + assert(length > 4); + + for (size_t i = 1; i <= 4; ++i) + { + uint8_t ch = static_cast(data[length - i]); + + // either a standalone character or a leading one + if ((ch & 0xc0) != 0x80) return length - i; + } + + // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk + return length; + } + + size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) + { + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + uint16_t* dest = reinterpret_cast(result); + + // convert to native utf16 + uint16_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint16_t); + } + + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + uint32_t* dest = reinterpret_cast(result); + + // convert to native utf32 + uint32_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint32_t); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return 0; + } +#endif + + class xml_buffered_writer + { + xml_buffered_writer(const xml_buffered_writer&); + xml_buffered_writer& operator=(const xml_buffered_writer&); + + public: + xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding)) + { + } + + ~xml_buffered_writer() + { + flush(); + } + + void flush() + { + flush(buffer, bufsize); + bufsize = 0; + } + + void flush(const char_t* data, size_t size) + { + if (size == 0) return; + + // fast path, just write data + if (encoding == get_write_native_encoding()) + writer.write(data, size * sizeof(char_t)); + else + { + // convert chunk + size_t result = convert_buffer(scratch, data, size, encoding); + assert(result <= sizeof(scratch)); + + // write data + writer.write(scratch, result); + } + } + + void write(const char_t* data, size_t length) + { + if (bufsize + length > bufcapacity) + { + // flush the remaining buffer contents + flush(); + + // handle large chunks + if (length > bufcapacity) + { + if (encoding == get_write_native_encoding()) + { + // fast path, can just write data chunk + writer.write(data, length * sizeof(char_t)); + return; + } + + // need to convert in suitable chunks + while (length > bufcapacity) + { + // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer + // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) + size_t chunk_size = get_valid_length(data, bufcapacity); + + // convert chunk and write + flush(data, chunk_size); + + // iterate + data += chunk_size; + length -= chunk_size; + } + + // small tail is copied below + bufsize = 0; + } + } + + memcpy(buffer + bufsize, data, length * sizeof(char_t)); + bufsize += length; + } + + void write(const char_t* data) + { + write(data, impl::strlen(data)); + } + + void write(char_t d0) + { + if (bufsize + 1 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + bufsize += 1; + } + + void write(char_t d0, char_t d1) + { + if (bufsize + 2 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + bufsize += 2; + } + + void write(char_t d0, char_t d1, char_t d2) + { + if (bufsize + 3 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + bufsize += 3; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3) + { + if (bufsize + 4 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + buffer[bufsize + 3] = d3; + bufsize += 4; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) + { + if (bufsize + 5 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + buffer[bufsize + 3] = d3; + buffer[bufsize + 4] = d4; + bufsize += 5; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) + { + if (bufsize + 6 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + buffer[bufsize + 3] = d3; + buffer[bufsize + 4] = d4; + buffer[bufsize + 5] = d5; + bufsize += 6; + } + + // utf8 maximum expansion: x4 (-> utf32) + // utf16 maximum expansion: x2 (-> utf32) + // utf32 maximum expansion: x1 + enum { bufcapacity = 2048 }; + + char_t buffer[bufcapacity]; + char scratch[4 * bufcapacity]; + + xml_writer& writer; + size_t bufsize; + xml_encoding encoding; + }; + + void write_bom(xml_writer& writer, xml_encoding encoding) + { + switch (encoding) + { + case encoding_utf8: + writer.write("\xef\xbb\xbf", 3); + break; + + case encoding_utf16_be: + writer.write("\xfe\xff", 2); + break; + + case encoding_utf16_le: + writer.write("\xff\xfe", 2); + break; + + case encoding_utf32_be: + writer.write("\x00\x00\xfe\xff", 4); + break; + + case encoding_utf32_le: + writer.write("\xff\xfe\x00\x00", 4); + break; + + default: + // invalid encoding (this should not happen) + assert(false); + } + } + + void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type) + { + while (*s) + { + const char_t* prev = s; + + // While *s is a usual symbol + while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s; + + writer.write(prev, static_cast(s - prev)); + + switch (*s) + { + case 0: break; + case '&': + writer.write('&', 'a', 'm', 'p', ';'); + ++s; + break; + case '<': + writer.write('&', 'l', 't', ';'); + ++s; + break; + case '>': + writer.write('&', 'g', 't', ';'); + ++s; + break; + case '"': + writer.write('&', 'q', 'u', 'o', 't', ';'); + ++s; + break; + default: // s is not a usual symbol + { + unsigned int ch = static_cast(*s++); + assert(ch < 32); + + writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';'); + } + } + } + } + + void node_output_attributes(xml_buffered_writer& writer, const xml_node& node) + { + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + + for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) + { + writer.write(' '); + writer.write(a.name()[0] ? a.name() : default_name); + writer.write('=', '"'); + + text_output_escaped(writer, a.value(), oct_special_attr); + + writer.write('"'); + } + } + + void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth) + { + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + + if ((flags & format_indent) != 0 && (flags & format_raw) == 0) + for (unsigned int i = 0; i < depth; ++i) writer.write(indent); + + switch (node.type()) + { + case node_document: + { + for (xml_node n = node.first_child(); n; n = n.next_sibling()) + node_output(writer, n, indent, flags, depth); + break; + } + + case node_element: + { + const char_t* name = node.name()[0] ? node.name() : default_name; + + writer.write('<'); + writer.write(name); + + node_output_attributes(writer, node); + + if (flags & format_raw) + { + if (!node.first_child()) + writer.write(' ', '/', '>'); + else + { + writer.write('>'); + + for (xml_node n = node.first_child(); n; n = n.next_sibling()) + node_output(writer, n, indent, flags, depth + 1); + + writer.write('<', '/'); + writer.write(name); + writer.write('>'); + } + } + else if (!node.first_child()) + writer.write(' ', '/', '>', '\n'); + else if (node.first_child() == node.last_child() && node.first_child().type() == node_pcdata) + { + writer.write('>'); + + text_output_escaped(writer, node.first_child().value(), oct_special_pcdata); + + writer.write('<', '/'); + writer.write(name); + writer.write('>', '\n'); + } + else + { + writer.write('>', '\n'); + + for (xml_node n = node.first_child(); n; n = n.next_sibling()) + node_output(writer, n, indent, flags, depth + 1); + + if ((flags & format_indent) != 0 && (flags & format_raw) == 0) + for (unsigned int i = 0; i < depth; ++i) writer.write(indent); + + writer.write('<', '/'); + writer.write(name); + writer.write('>', '\n'); + } + + break; + } + + case node_pcdata: + text_output_escaped(writer, node.value(), oct_special_pcdata); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + case node_cdata: + writer.write('<', '!', '[', 'C', 'D'); + writer.write('A', 'T', 'A', '['); + writer.write(node.value()); + writer.write(']', ']', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + case node_comment: + writer.write('<', '!', '-', '-'); + writer.write(node.value()); + writer.write('-', '-', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + case node_pi: + case node_declaration: + writer.write('<', '?'); + writer.write(node.name()[0] ? node.name() : default_name); + + if (node.type() == node_declaration) + { + node_output_attributes(writer, node); + } + else if (node.value()[0]) + { + writer.write(' '); + writer.write(node.value()); + } + + writer.write('?', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + default: + assert(false); + } + } + + inline bool has_declaration(const xml_node& node) + { + for (xml_node child = node.first_child(); child; child = child.next_sibling()) + { + xml_node_type type = child.type(); + + if (type == node_declaration) return true; + if (type == node_element) return false; + } + + return false; + } + + inline bool allow_insert_child(xml_node_type parent, xml_node_type child) + { + if (parent != node_document && parent != node_element) return false; + if (child == node_document || child == node_null) return false; + if (parent != node_document && child == node_declaration) return false; + + return true; + } + + void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip) + { + assert(dest.type() == source.type()); + + switch (source.type()) + { + case node_element: + { + dest.set_name(source.name()); + + for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) + dest.append_attribute(a.name()).set_value(a.value()); + + for (xml_node c = source.first_child(); c; c = c.next_sibling()) + { + if (c == skip) continue; + + xml_node cc = dest.append_child(c.type()); + assert(cc); + + recursive_copy_skip(cc, c, skip); + } + + break; + } + + case node_pcdata: + case node_cdata: + case node_comment: + dest.set_value(source.value()); + break; + + case node_pi: + dest.set_name(source.name()); + dest.set_value(source.value()); + break; + + case node_declaration: + { + dest.set_name(source.name()); + + for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) + dest.append_attribute(a.name()).set_value(a.value()); + + break; + } + + default: + assert(false); + } + } + +#ifndef PUGIXML_NO_STL + template xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream >& stream, unsigned int options, xml_encoding encoding) + { + if (!stream.good()) return make_parse_result(status_io_error); + + // get length of remaining data in stream + std::streamoff pos = stream.tellg(); + stream.seekg(0, std::ios::end); + std::streamoff length = stream.tellg() - pos; + stream.seekg(pos, std::ios::beg); + + if (!stream.good() || pos < 0 || length < 0) return make_parse_result(status_io_error); + + // read stream data into memory + size_t read_length = static_cast(length); + + T* s = static_cast(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T))); + if (!s) return make_parse_result(status_out_of_memory); + + stream.read(s, static_cast(read_length)); + + // check for errors + size_t actual_length = static_cast(stream.gcount()); + assert(actual_length <= read_length); + + if (read_length > 0 && actual_length == 0) + { + global_deallocate(s); + return make_parse_result(status_io_error); + } + + // load data from buffer + return doc.load_buffer_inplace_own(s, actual_length * sizeof(T), options, encoding); + } +#endif +} + +namespace pugi +{ + xml_writer_file::xml_writer_file(void* file): file(file) + { + } + + void xml_writer_file::write(const void* data, size_t size) + { + fwrite(data, size, 1, static_cast(file)); + } + +#ifndef PUGIXML_NO_STL + xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(&stream), wide_stream(0) + { + } + + xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(0), wide_stream(&stream) + { + } + + void xml_writer_stream::write(const void* data, size_t size) + { + if (narrow_stream) + { + assert(!wide_stream); + narrow_stream->write(reinterpret_cast(data), static_cast(size)); + } + else + { + assert(wide_stream); + assert(size % sizeof(wchar_t) == 0); + + wide_stream->write(reinterpret_cast(data), static_cast(size / sizeof(wchar_t))); + } + } +#endif + + xml_tree_walker::xml_tree_walker(): _depth(0) + { + } + + xml_tree_walker::~xml_tree_walker() + { + } + + int xml_tree_walker::depth() const + { + return _depth; + } + + bool xml_tree_walker::begin(xml_node&) + { + return true; + } + + bool xml_tree_walker::end(xml_node&) + { + return true; + } + + xml_attribute::xml_attribute(): _attr(0) + { + } + + xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) + { + } + + xml_attribute::operator xml_attribute::unspecified_bool_type() const + { +#ifdef __MWERKS__ + return _attr ? &xml_attribute::empty : 0; +#else + return _attr ? &xml_attribute::_attr : 0; +#endif + } + + bool xml_attribute::operator!() const + { + return !_attr; + } + + bool xml_attribute::operator==(const xml_attribute& r) const + { + return (_attr == r._attr); + } + + bool xml_attribute::operator!=(const xml_attribute& r) const + { + return (_attr != r._attr); + } + + bool xml_attribute::operator<(const xml_attribute& r) const + { + return (_attr < r._attr); + } + + bool xml_attribute::operator>(const xml_attribute& r) const + { + return (_attr > r._attr); + } + + bool xml_attribute::operator<=(const xml_attribute& r) const + { + return (_attr <= r._attr); + } + + bool xml_attribute::operator>=(const xml_attribute& r) const + { + return (_attr >= r._attr); + } + + xml_attribute xml_attribute::next_attribute() const + { + return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); + } + + xml_attribute xml_attribute::previous_attribute() const + { + return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); + } + + int xml_attribute::as_int() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return (int)wcstol(_attr->value, 0, 10); + #else + return (int)strtol(_attr->value, 0, 10); + #endif + } + + unsigned int xml_attribute::as_uint() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return (unsigned int)wcstoul(_attr->value, 0, 10); + #else + return (unsigned int)strtoul(_attr->value, 0, 10); + #endif + } + + double xml_attribute::as_double() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return wcstod(_attr->value, 0); + #else + return strtod(_attr->value, 0); + #endif + } + + float xml_attribute::as_float() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return (float)wcstod(_attr->value, 0); + #else + return (float)strtod(_attr->value, 0); + #endif + } + + bool xml_attribute::as_bool() const + { + if (!_attr || !_attr->value) return false; + + // only look at first char + char_t first = *_attr->value; + + // 1*, t* (true), T* (True), y* (yes), Y* (YES) + return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); + } + + bool xml_attribute::empty() const + { + return !_attr; + } + + const char_t* xml_attribute::name() const + { + return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT(""); + } + + const char_t* xml_attribute::value() const + { + return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT(""); + } + + unsigned int xml_attribute::document_order() const + { + return 0; + } + + xml_attribute& xml_attribute::operator=(const char_t* rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(int rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(unsigned int rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(double rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(bool rhs) + { + set_value(rhs); + return *this; + } + + bool xml_attribute::set_name(const char_t* rhs) + { + if (!_attr) return false; + + return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs); + } + + bool xml_attribute::set_value(const char_t* rhs) + { + if (!_attr) return false; + + return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs); + } + + bool xml_attribute::set_value(int rhs) + { + char buf[128]; + sprintf(buf, "%d", rhs); + + #ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + impl::widen_ascii(wbuf, buf); + + return set_value(wbuf); + #else + return set_value(buf); + #endif + } + + bool xml_attribute::set_value(unsigned int rhs) + { + char buf[128]; + sprintf(buf, "%u", rhs); + + #ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + impl::widen_ascii(wbuf, buf); + + return set_value(wbuf); + #else + return set_value(buf); + #endif + } + + bool xml_attribute::set_value(double rhs) + { + char buf[128]; + sprintf(buf, "%g", rhs); + + #ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + impl::widen_ascii(wbuf, buf); + + return set_value(wbuf); + #else + return set_value(buf); + #endif + } + + bool xml_attribute::set_value(bool rhs) + { + return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); + } + +#ifdef __BORLANDC__ + bool operator&&(const xml_attribute& lhs, bool rhs) + { + return (bool)lhs && rhs; + } + + bool operator||(const xml_attribute& lhs, bool rhs) + { + return (bool)lhs || rhs; + } +#endif + + xml_node::xml_node(): _root(0) + { + } + + xml_node::xml_node(xml_node_struct* p): _root(p) + { + } + + xml_node::operator xml_node::unspecified_bool_type() const + { +#ifdef __MWERKS__ + return _root ? &xml_node::empty : 0; +#else + return _root ? &xml_node::_root : 0; +#endif + } + + bool xml_node::operator!() const + { + return !_root; + } + + xml_node::iterator xml_node::begin() const + { + return iterator(_root ? _root->first_child : 0, _root); + } + + xml_node::iterator xml_node::end() const + { + return iterator(0, _root); + } + + xml_node::attribute_iterator xml_node::attributes_begin() const + { + return attribute_iterator(_root ? _root->first_attribute : 0, _root); + } + + xml_node::attribute_iterator xml_node::attributes_end() const + { + return attribute_iterator(0, _root); + } + + bool xml_node::operator==(const xml_node& r) const + { + return (_root == r._root); + } + + bool xml_node::operator!=(const xml_node& r) const + { + return (_root != r._root); + } + + bool xml_node::operator<(const xml_node& r) const + { + return (_root < r._root); + } + + bool xml_node::operator>(const xml_node& r) const + { + return (_root > r._root); + } + + bool xml_node::operator<=(const xml_node& r) const + { + return (_root <= r._root); + } + + bool xml_node::operator>=(const xml_node& r) const + { + return (_root >= r._root); + } + + bool xml_node::empty() const + { + return !_root; + } + + const char_t* xml_node::name() const + { + return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); + } + + xml_node_type xml_node::type() const + { + return _root ? static_cast(_root->header & xml_memory_page_type_mask) : node_null; + } + + const char_t* xml_node::value() const + { + return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); + } + + xml_node xml_node::child(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequal(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::child_w(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_attribute xml_node::attribute(const char_t* name) const + { + if (!_root) return xml_attribute(); + + for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) + if (i->name && impl::strequal(name, i->name)) + return xml_attribute(i); + + return xml_attribute(); + } + + xml_attribute xml_node::attribute_w(const char_t* name) const + { + if (!_root) return xml_attribute(); + + for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) + if (i->name && impl::strequalwild(name, i->name)) + return xml_attribute(i); + + return xml_attribute(); + } + + xml_node xml_node::next_sibling(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) + if (i->name && impl::strequal(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::next_sibling_w(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::next_sibling() const + { + if (!_root) return xml_node(); + + if (_root->next_sibling) return xml_node(_root->next_sibling); + else return xml_node(); + } + + xml_node xml_node::previous_sibling(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) + if (i->name && impl::strequal(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::previous_sibling_w(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::previous_sibling() const + { + if (!_root) return xml_node(); + + if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); + else return xml_node(); + } + + xml_node xml_node::parent() const + { + return _root ? xml_node(_root->parent) : xml_node(); + } + + xml_node xml_node::root() const + { + xml_node_struct* r = _root; + + while (r && r->parent) r = r->parent; + + return xml_node(r); + } + + const char_t* xml_node::child_value() const + { + if (!_root) return PUGIXML_TEXT(""); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + { + xml_node_type type = static_cast(i->header & xml_memory_page_type_mask); + + if (i->value && (type == node_pcdata || type == node_cdata)) + return i->value; + } + + return PUGIXML_TEXT(""); + } + + const char_t* xml_node::child_value(const char_t* name) const + { + return child(name).child_value(); + } + + const char_t* xml_node::child_value_w(const char_t* name) const + { + if (!_root) return PUGIXML_TEXT(""); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i).child_value(); + + return PUGIXML_TEXT(""); + } + + xml_attribute xml_node::first_attribute() const + { + return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); + } + + xml_attribute xml_node::last_attribute() const + { + return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); + } + + xml_node xml_node::first_child() const + { + return _root ? xml_node(_root->first_child) : xml_node(); + } + + xml_node xml_node::last_child() const + { + return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); + } + + bool xml_node::set_name(const char_t* rhs) + { + switch (type()) + { + case node_pi: + case node_declaration: + case node_element: + return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs); + + default: + return false; + } + } + + bool xml_node::set_value(const char_t* rhs) + { + switch (type()) + { + case node_pi: + case node_cdata: + case node_pcdata: + case node_comment: + return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs); + + default: + return false; + } + } + + xml_attribute xml_node::append_attribute(const char_t* name) + { + if (type() != node_element && type() != node_declaration) return xml_attribute(); + + xml_attribute a(append_attribute_ll(_root, get_allocator(_root))); + if (!a) return xml_attribute(); + + a.set_name(name); + + return a; + } + + xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr) + { + if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); + + // check that attribute belongs to *this + xml_attribute_struct* cur = attr._attr; + + while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; + + if (cur != _root->first_attribute) return xml_attribute(); + + xml_attribute a(allocate_attribute(get_allocator(_root))); + if (!a) return xml_attribute(); + + a.set_name(name); + + if (attr._attr->prev_attribute_c->next_attribute) + attr._attr->prev_attribute_c->next_attribute = a._attr; + else + _root->first_attribute = a._attr; + + a._attr->prev_attribute_c = attr._attr->prev_attribute_c; + a._attr->next_attribute = attr._attr; + attr._attr->prev_attribute_c = a._attr; + + return a; + } + + xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr) + { + if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); + + // check that attribute belongs to *this + xml_attribute_struct* cur = attr._attr; + + while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; + + if (cur != _root->first_attribute) return xml_attribute(); + + xml_attribute a(allocate_attribute(get_allocator(_root))); + if (!a) return xml_attribute(); + + a.set_name(name); + + if (attr._attr->next_attribute) + attr._attr->next_attribute->prev_attribute_c = a._attr; + else + _root->first_attribute->prev_attribute_c = a._attr; + + a._attr->next_attribute = attr._attr->next_attribute; + a._attr->prev_attribute_c = attr._attr; + attr._attr->next_attribute = a._attr; + + return a; + } + + xml_attribute xml_node::append_copy(const xml_attribute& proto) + { + if (!proto) return xml_attribute(); + + xml_attribute result = append_attribute(proto.name()); + result.set_value(proto.value()); + + return result; + } + + xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) + { + if (!proto) return xml_attribute(); + + xml_attribute result = insert_attribute_after(proto.name(), attr); + result.set_value(proto.value()); + + return result; + } + + xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) + { + if (!proto) return xml_attribute(); + + xml_attribute result = insert_attribute_before(proto.name(), attr); + result.set_value(proto.value()); + + return result; + } + + xml_node xml_node::append_child(xml_node_type type) + { + if (!allow_insert_child(this->type(), type)) return xml_node(); + + xml_node n(append_node(_root, get_allocator(_root), type)); + if (!n) return xml_node(); + + if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + + return n; + } + + xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node) + { + if (!allow_insert_child(this->type(), type)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); + + xml_node n(allocate_node(get_allocator(_root), type)); + if (!n) return xml_node(); + + n._root->parent = _root; + + if (node._root->prev_sibling_c->next_sibling) + node._root->prev_sibling_c->next_sibling = n._root; + else + _root->first_child = n._root; + + n._root->prev_sibling_c = node._root->prev_sibling_c; + n._root->next_sibling = node._root; + node._root->prev_sibling_c = n._root; + + if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + + return n; + } + + xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node) + { + if (!allow_insert_child(this->type(), type)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); + + xml_node n(allocate_node(get_allocator(_root), type)); + if (!n) return xml_node(); + + n._root->parent = _root; + + if (node._root->next_sibling) + node._root->next_sibling->prev_sibling_c = n._root; + else + _root->first_child->prev_sibling_c = n._root; + + n._root->next_sibling = node._root->next_sibling; + n._root->prev_sibling_c = node._root; + node._root->next_sibling = n._root; + + if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + + return n; + } + + xml_node xml_node::append_copy(const xml_node& proto) + { + xml_node result = append_child(proto.type()); + + if (result) recursive_copy_skip(result, proto, result); + + return result; + } + + xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) + { + xml_node result = insert_child_after(proto.type(), node); + + if (result) recursive_copy_skip(result, proto, result); + + return result; + } + + xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) + { + xml_node result = insert_child_before(proto.type(), node); + + if (result) recursive_copy_skip(result, proto, result); + + return result; + } + + bool xml_node::remove_attribute(const char_t* name) + { + return remove_attribute(attribute(name)); + } + + bool xml_node::remove_attribute(const xml_attribute& a) + { + if (!_root || !a._attr) return false; + + // check that attribute belongs to *this + xml_attribute_struct* attr = a._attr; + + while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c; + + if (attr != _root->first_attribute) return false; + + if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c; + else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c; + + if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute; + else _root->first_attribute = a._attr->next_attribute; + + destroy_attribute(a._attr, get_allocator(_root)); + + return true; + } + + bool xml_node::remove_child(const char_t* name) + { + return remove_child(child(name)); + } + + bool xml_node::remove_child(const xml_node& n) + { + if (!_root || !n._root || n._root->parent != _root) return false; + + if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c; + else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c; + + if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling; + else _root->first_child = n._root->next_sibling; + + destroy_node(n._root, get_allocator(_root)); + + return true; + } + + xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequal(name, i->name)) + { + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) + return xml_node(i); + } + + return xml_node(); + } + + xml_node xml_node::find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) + { + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) + return xml_node(i); + } + + return xml_node(); + } + + xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) + return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) + return xml_node(i); + + return xml_node(); + } + +#ifndef PUGIXML_NO_STL + string_t xml_node::path(char_t delimiter) const + { + string_t path; + + xml_node cursor = *this; // Make a copy. + + path = cursor.name(); + + while (cursor.parent()) + { + cursor = cursor.parent(); + + string_t temp = cursor.name(); + temp += delimiter; + temp += path; + path.swap(temp); + } + + return path; + } +#endif + + xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const + { + xml_node found = *this; // Current search context. + + if (!_root || !path || !path[0]) return found; + + if (path[0] == delimiter) + { + // Absolute path; e.g. '/foo/bar' + while (found.parent()) found = found.parent(); + ++path; + } + + const char_t* path_segment = path; + + while (*path_segment == delimiter) ++path_segment; + + const char_t* path_segment_end = path_segment; + + while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; + + if (path_segment == path_segment_end) return found; + + const char_t* next_segment = path_segment_end; + + while (*next_segment == delimiter) ++next_segment; + + if (*path_segment == '.' && path_segment + 1 == path_segment_end) + return found.first_element_by_path(next_segment, delimiter); + else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) + return found.parent().first_element_by_path(next_segment, delimiter); + else + { + for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) + { + if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) + { + xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); + + if (subsearch) return subsearch; + } + } + + return xml_node(); + } + } + + bool xml_node::traverse(xml_tree_walker& walker) + { + walker._depth = -1; + + xml_node arg_begin = *this; + if (!walker.begin(arg_begin)) return false; + + xml_node cur = first_child(); + + if (cur) + { + ++walker._depth; + + do + { + xml_node arg_for_each = cur; + if (!walker.for_each(arg_for_each)) + return false; + + if (cur.first_child()) + { + ++walker._depth; + cur = cur.first_child(); + } + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + // Borland C++ workaround + while (!cur.next_sibling() && cur != *this && (bool)cur.parent()) + { + --walker._depth; + cur = cur.parent(); + } + + if (cur != *this) + cur = cur.next_sibling(); + } + } + while (cur && cur != *this); + } + + assert(walker._depth == -1); + + xml_node arg_end = *this; + return walker.end(arg_end); + } + + unsigned int xml_node::document_order() const + { + return 0; + } + + void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const + { + if (!_root) return; + + xml_buffered_writer buffered_writer(writer, encoding); + + node_output(buffered_writer, *this, indent, flags, depth); + } + +#ifndef PUGIXML_NO_STL + void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const + { + if (!_root) return; + + xml_writer_stream writer(stream); + + print(writer, indent, flags, encoding, depth); + } + + void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const + { + if (!_root) return; + + xml_writer_stream writer(stream); + + print(writer, indent, flags, encoding_wchar, depth); + } +#endif + + ptrdiff_t xml_node::offset_debug() const + { + xml_node_struct* r = root()._root; + + if (!r) return -1; + + const char_t* buffer = static_cast(r)->buffer; + + if (!buffer) return -1; + + switch (type()) + { + case node_document: + return 0; + + case node_element: + case node_declaration: + case node_pi: + return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer; + + case node_pcdata: + case node_cdata: + case node_comment: + return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer; + + default: + return -1; + } + } + +#ifdef __BORLANDC__ + bool operator&&(const xml_node& lhs, bool rhs) + { + return (bool)lhs && rhs; + } + + bool operator||(const xml_node& lhs, bool rhs) + { + return (bool)lhs || rhs; + } +#endif + + xml_node_iterator::xml_node_iterator() + { + } + + xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) + { + } + + xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) + { + } + + bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const + { + return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; + } + + bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const + { + return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; + } + + xml_node& xml_node_iterator::operator*() + { + assert(_wrap._root); + return _wrap; + } + + xml_node* xml_node_iterator::operator->() + { + assert(_wrap._root); + return &_wrap; + } + + const xml_node_iterator& xml_node_iterator::operator++() + { + assert(_wrap._root); + _wrap._root = _wrap._root->next_sibling; + return *this; + } + + xml_node_iterator xml_node_iterator::operator++(int) + { + xml_node_iterator temp = *this; + ++*this; + return temp; + } + + const xml_node_iterator& xml_node_iterator::operator--() + { + if (_wrap._root) _wrap = _wrap.previous_sibling(); + else _wrap = _parent.last_child(); + return *this; + } + + xml_node_iterator xml_node_iterator::operator--(int) + { + xml_node_iterator temp = *this; + --*this; + return temp; + } + + xml_attribute_iterator::xml_attribute_iterator() + { + } + + xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) + { + } + + xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) + { + } + + bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const + { + return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; + } + + bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const + { + return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; + } + + xml_attribute& xml_attribute_iterator::operator*() + { + assert(_wrap._attr); + return _wrap; + } + + xml_attribute* xml_attribute_iterator::operator->() + { + assert(_wrap._attr); + return &_wrap; + } + + const xml_attribute_iterator& xml_attribute_iterator::operator++() + { + assert(_wrap._attr); + _wrap._attr = _wrap._attr->next_attribute; + return *this; + } + + xml_attribute_iterator xml_attribute_iterator::operator++(int) + { + xml_attribute_iterator temp = *this; + ++*this; + return temp; + } + + const xml_attribute_iterator& xml_attribute_iterator::operator--() + { + if (_wrap._attr) _wrap = _wrap.previous_attribute(); + else _wrap = _parent.last_attribute(); + return *this; + } + + xml_attribute_iterator xml_attribute_iterator::operator--(int) + { + xml_attribute_iterator temp = *this; + --*this; + return temp; + } + + const char* xml_parse_result::description() const + { + switch (status) + { + case status_ok: return "No error"; + + case status_file_not_found: return "File was not found"; + case status_io_error: return "Error reading from file/stream"; + case status_out_of_memory: return "Could not allocate memory"; + case status_internal_error: return "Internal error occurred"; + + case status_unrecognized_tag: return "Could not determine tag type"; + + case status_bad_pi: return "Error parsing document declaration/processing instruction"; + case status_bad_comment: return "Error parsing comment"; + case status_bad_cdata: return "Error parsing CDATA section"; + case status_bad_doctype: return "Error parsing document type declaration"; + case status_bad_pcdata: return "Error parsing PCDATA section"; + case status_bad_start_element: return "Error parsing start element tag"; + case status_bad_attribute: return "Error parsing element attribute"; + case status_bad_end_element: return "Error parsing end element tag"; + case status_end_element_mismatch: return "Start-end tags mismatch"; + + default: return "Unknown error"; + } + } + + xml_document::xml_document(): _buffer(0) + { + create(); + } + + xml_document::~xml_document() + { + destroy(); + } + + void xml_document::create() + { + destroy(); + + // initialize sentinel page + STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory)); + + // align upwards to page boundary + void* page_memory = reinterpret_cast((reinterpret_cast(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); + + // prepare page structure + xml_memory_page* page = xml_memory_page::construct(page_memory); + + page->busy_size = xml_memory_page_size; + + // allocate new root + _root = new (page->data) xml_document_struct(page); + _root->prev_sibling_c = _root; + + // setup allocator + xml_allocator& a = static_cast(_root)->allocator; + a = xml_allocator(page); + + // setup sentinel page + page->allocator = &a; + } + + void xml_document::destroy() + { + // destroy static storage + if (_buffer) + { + global_deallocate(_buffer); + _buffer = 0; + } + + // destroy dynamic storage, leave sentinel page (it's in static memory) + if (_root) + { + xml_memory_page* root_page = reinterpret_cast(_root->header & xml_memory_page_pointer_mask); + assert(root_page && !root_page->prev && !root_page->memory); + + // destroy all pages + for (xml_memory_page* page = root_page->next; page; ) + { + xml_memory_page* next = page->next; + + xml_allocator::deallocate_page(page); + + page = next; + } + + // cleanup root page + root_page->allocator = 0; + root_page->next = 0; + root_page->busy_size = root_page->freed_size = 0; + + _root = 0; + } + } + +#ifndef PUGIXML_NO_STL + xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) + { + create(); + + return load_stream_impl(*this, stream, options, encoding); + } + + xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) + { + create(); + + return load_stream_impl(*this, stream, options, encoding_wchar); + } +#endif + + xml_parse_result xml_document::load(const char_t* contents, unsigned int options) + { + create(); + + // Force native encoding (skip autodetection) + #ifdef PUGIXML_WCHAR_MODE + xml_encoding encoding = encoding_wchar; + #else + xml_encoding encoding = encoding_utf8; + #endif + + return load_buffer(contents, impl::strlen(contents) * sizeof(char_t), options, encoding); + } + + xml_parse_result xml_document::parse(char* xmlstr, unsigned int options) + { + return load_buffer_inplace(xmlstr, strlen(xmlstr), options, encoding_utf8); + } + + xml_parse_result xml_document::parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options) + { + return load_buffer_inplace_own(xmlstr, strlen(xmlstr), options, encoding_utf8); + } + + xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding) + { + create(); + + FILE* file = fopen(path, "rb"); + if (!file) return make_parse_result(status_file_not_found); + + fseek(file, 0, SEEK_END); + long length = ftell(file); + fseek(file, 0, SEEK_SET); + + if (length < 0) + { + fclose(file); + return make_parse_result(status_io_error); + } + + char* s = static_cast(global_allocate(length > 0 ? length : 1)); + + if (!s) + { + fclose(file); + return make_parse_result(status_out_of_memory); + } + + size_t read = fread(s, 1, (size_t)length, file); + fclose(file); + + if (read != (size_t)length) + { + global_deallocate(s); + return make_parse_result(status_io_error); + } + + return load_buffer_inplace_own(s, length, options, encoding); + } + + xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own) + { + create(); + + // get actual encoding + xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size); + + // get private buffer + char_t* buffer = 0; + size_t length = 0; + + if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory); + + // delete original buffer if we performed a conversion + if (own && buffer != contents) global_deallocate(contents); + + // parse + xml_parse_result res = xml_parser::parse(buffer, length, _root, options); + + // remember encoding + res.encoding = buffer_encoding; + + // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself + if (own || buffer != contents) _buffer = buffer; + + return res; + } + + xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) + { + return load_buffer_impl(const_cast(contents), size, options, encoding, false, false); + } + + xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) + { + return load_buffer_impl(contents, size, options, encoding, true, false); + } + + xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) + { + return load_buffer_impl(contents, size, options, encoding, true, true); + } + + void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const + { + if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding)); + + xml_buffered_writer buffered_writer(writer, encoding); + + if (!(flags & format_no_declaration) && !has_declaration(*this)) + { + buffered_writer.write(PUGIXML_TEXT("")); + if (!(flags & format_raw)) buffered_writer.write('\n'); + } + + node_output(buffered_writer, *this, indent, flags, 0); + } + +#ifndef PUGIXML_NO_STL + void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const + { + xml_writer_stream writer(stream); + + save(writer, indent, flags, encoding); + } + + void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags) const + { + xml_writer_stream writer(stream); + + save(writer, indent, flags, encoding_wchar); + } +#endif + + bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const + { + FILE* file = fopen(path, "wb"); + if (!file) return false; + + xml_writer_file writer(file); + save(writer, indent, flags, encoding); + + fclose(file); + + return true; + } + + void xml_document::precompute_document_order() + { + } + +#ifndef PUGIXML_NO_STL + std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) + { + assert(str); + + STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); + + size_t length = wcslen(str); + + // first pass: get length in utf8 characters + size_t size = sizeof(wchar_t) == 2 ? + utf_decoder::decode_utf16_block(reinterpret_cast(str), length, 0) : + utf_decoder::decode_utf32_block(reinterpret_cast(str), length, 0); + + // allocate resulting string + std::string result; + result.resize(size); + + // second pass: convert to utf8 + if (size > 0) + { + uint8_t* begin = reinterpret_cast(&result[0]); + uint8_t* end = sizeof(wchar_t) == 2 ? + utf_decoder::decode_utf16_block(reinterpret_cast(str), length, begin) : + utf_decoder::decode_utf32_block(reinterpret_cast(str), length, begin); + + // truncate invalid output + assert(begin <= end && static_cast(end - begin) <= result.size()); + result.resize(static_cast(end - begin)); + } + + return result; + } + + std::wstring PUGIXML_FUNCTION as_utf16(const char* str) + { + return as_wide(str); + } + + std::wstring PUGIXML_FUNCTION as_wide(const char* str) + { + assert(str); + + const uint8_t* data = reinterpret_cast(str); + size_t size = strlen(str); + + // first pass: get length in wchar_t + size_t length = utf_decoder::decode_utf8_block(data, size, 0); + + // allocate resulting string + std::wstring result; + result.resize(length); + + // second pass: convert to wchar_t + if (length > 0) + { + wchar_writer::value_type begin = reinterpret_cast(&result[0]); + wchar_writer::value_type end = utf_decoder::decode_utf8_block(data, size, begin); + + // truncate invalid output + assert(begin <= end && static_cast(end - begin) <= result.size()); + result.resize(static_cast(end - begin)); + } + + return result; + } +#endif + + void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) + { + global_allocate = allocate; + global_deallocate = deallocate; + } + + allocation_function PUGIXML_FUNCTION get_memory_allocation_function() + { + return global_allocate; + } + + deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() + { + return global_deallocate; + } +} + +#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) +namespace std +{ + // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) + { + return std::bidirectional_iterator_tag(); + } + + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) + { + return std::bidirectional_iterator_tag(); + } +} +#endif + +#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) +namespace std +{ + // Workarounds for (non-standard) iterator category detection + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) + { + return std::bidirectional_iterator_tag(); + } + + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) + { + return std::bidirectional_iterator_tag(); + } +} +#endif + +/** + * Copyright (c) 2006-2010 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 8207f16..281a0ea 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -1,2378 +1,2378 @@ -/** - * pugixml parser - version 0.9 - * -------------------------------------------------------- - * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) - * Report bugs and download new versions at http://code.google.com/p/pugixml/ - * - * This library is distributed under the MIT License. See notice at the end - * of this file. - * - * This work is based on the pugxml parser, which is: - * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) - */ - -#ifndef HEADER_PUGIXML_HPP -#define HEADER_PUGIXML_HPP - -#include "pugiconfig.hpp" - -#ifndef PUGIXML_NO_STL -namespace std -{ - struct bidirectional_iterator_tag; - -#ifdef __SUNPRO_CC - // Sun C++ compiler has a bug which forces template argument names in forward declarations to be the same as in actual definitions - template class allocator; - template struct char_traits; - template class basic_istream; - template class basic_ostream; - template class basic_string; -#else - // Borland C++ compiler has a bug which forces template argument names in forward declarations to be the same as in actual definitions - template class allocator; - template struct char_traits; - template class basic_istream; - template class basic_ostream; - template class basic_string; -#endif - - // Digital Mars compiler has a bug which requires a forward declaration for explicit instantiation (otherwise type selection is messed up later, producing link errors) - // Also note that we have to declare char_traits as a class here, since it's defined that way -#ifdef __DMC__ - template <> class char_traits; -#endif -} -#endif - -// Macro for deprecated features -#ifndef PUGIXML_DEPRECATED -# if defined(__GNUC__) -# define PUGIXML_DEPRECATED __attribute__((deprecated)) -# elif defined(_MSC_VER) && _MSC_VER >= 1300 -# define PUGIXML_DEPRECATED __declspec(deprecated) -# else -# define PUGIXML_DEPRECATED -# endif -#endif - -// No XPath without STL or exceptions -#if (defined(PUGIXML_NO_STL) || defined(PUGIXML_NO_EXCEPTIONS)) && !defined(PUGIXML_NO_XPATH) -# define PUGIXML_NO_XPATH -#endif - -// Include exception header for XPath -#ifndef PUGIXML_NO_XPATH -# include -#endif - -// If no API is defined, assume default -#ifndef PUGIXML_API -# define PUGIXML_API -#endif - -// If no API for classes is defined, assume default -#ifndef PUGIXML_CLASS -# define PUGIXML_CLASS PUGIXML_API -#endif - -// If no API for functions is defined, assume default -#ifndef PUGIXML_FUNCTION -# define PUGIXML_FUNCTION PUGIXML_API -#endif - -#include - -// Character interface macros -#ifdef PUGIXML_WCHAR_MODE -# define PUGIXML_TEXT(t) L ## t - -namespace pugi -{ - /// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE - typedef wchar_t char_t; - -#ifndef PUGIXML_NO_STL - /// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE - typedef std::basic_string, std::allocator > string_t; -#endif -} -#else -# define PUGIXML_TEXT(t) t - -namespace pugi -{ - /// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE - typedef char char_t; - -# ifndef PUGIXML_NO_STL - // GCC 3.4 has a bug which prevents string_t instantiation using char_t, so we have to use char type explicitly - /// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE - typedef std::basic_string, std::allocator > string_t; -# endif -} -#endif - -// Helpers for inline implementation -namespace pugi -{ - namespace impl - { - bool PUGIXML_FUNCTION strequal(const char_t*, const char_t*); - bool PUGIXML_FUNCTION strequalwild(const char_t*, const char_t*); - } -} - -/// The PugiXML Parser namespace. -namespace pugi -{ - /// Tree node classification. - enum xml_node_type - { - node_null, ///< Undifferentiated entity - node_document, ///< A document tree's absolute root. - node_element, ///< E.g. '<...>' - node_pcdata, ///< E.g. '>...<' - node_cdata, ///< E.g. '' - node_comment, ///< E.g. '' - node_pi, ///< E.g. '' - node_declaration ///< E.g. '' - }; - - // Parsing options - - /** - * Minimal parsing mode. Equivalent to turning all other flags off. This set of flags means - * that pugixml does not add pi/cdata sections or comments to DOM tree and does not perform - * any conversions for input data, meaning fastest parsing. - */ - const unsigned int parse_minimal = 0x0000; - - /** - * This flag determines if processing instructions (nodes with type node_pi; such nodes have the - * form of or in XML) are to be put in DOM tree. If this flag is off, - * they are not put in the tree, but are still parsed and checked for correctness. - * - * The corresponding node in DOM tree will have type node_pi, name "target" and value "content", - * if any. - * - * Note that (document declaration) is not considered to be a PI. - * - * This flag is off by default. - */ - const unsigned int parse_pi = 0x0001; - - /** - * This flag determines if comments (nodes with type node_comment; such nodes have the form of - * in XML) are to be put in DOM tree. If this flag is off, they are not put in - * the tree, but are still parsed and checked for correctness. - * - * The corresponding node in DOM tree will have type node_comment, empty name and value "content". - * - * This flag is off by default. - */ - const unsigned int parse_comments = 0x0002; - - /** - * This flag determines if CDATA sections (nodes with type node_cdata; such nodes have the form - * of in XML) are to be put in DOM tree. If this flag is off, they are not - * put in the tree, but are still parsed and checked for correctness. - * - * The corresponding node in DOM tree will have type node_cdata, empty name and value "content". - * - * This flag is on by default. - */ - const unsigned int parse_cdata = 0x0004; - - /** - * This flag determines if nodes with PCDATA (regular text) that consist only of whitespace - * characters are to be put in DOM tree. Often whitespace-only data is not significant for the - * application, and the cost of allocating and storing such nodes (both memory and speed-wise) - * can be significant. For example, after parsing XML string " ", element - * will have 3 children when parse_ws_pcdata is set (child with type node_pcdata and value=" ", - * child with type node_element and name "a", and another child with type node_pcdata and - * value=" "), and only 1 child when parse_ws_pcdata is not set. - * - * This flag is off by default. - */ - const unsigned int parse_ws_pcdata = 0x0008; - - /** - * This flag determines if character and entity references are to be expanded during the parsing - * process. Character references are &#...; or &#x...; (... is Unicode numeric representation of - * character in either decimal (&#...;) or hexadecimal (&#x...;) form), entity references are &...; - * Note that as pugixml does not handle DTD, the only allowed entities are predefined ones - - * &lt;, &gt;, &amp;, &apos; and &quot;. If character/entity reference can not be expanded, it is - * leaved as is, so you can do additional processing later. - * Reference expansion is performed in attribute values and PCDATA content. - * - * This flag is on by default. - */ - const unsigned int parse_escapes = 0x0010; - - /** - * This flag determines if EOL handling (that is, replacing sequences 0x0d 0x0a by a single 0x0a - * character, and replacing all standalone 0x0d characters by 0x0a) is to be performed on input - * data (that is, comments contents, PCDATA/CDATA contents and attribute values). - * - * This flag is on by default. - */ - const unsigned int parse_eol = 0x0020; - - /** - * This flag determines if attribute value normalization should be performed for all attributes. - * This means, that: - * 1. Whitespace characters (new line, tab and space) are replaced with space (' ') - * 2. Afterwards sequences of spaces are replaced with a single space - * 3. Leading/trailing whitespace characters are trimmed - * - * This flag is off by default. - */ - const unsigned int parse_wnorm_attribute = 0x0080; - - /** - * This flag determines if attribute value normalization should be performed for all attributes. - * This means, that whitespace characters (new line, tab and space) are replaced with space (' '). - * Note, that the actions performed while this flag is on are also performed if parse_wnorm_attribute - * is on, so this flag has no effect if parse_wnorm_attribute flag is set. - * New line characters are always treated as if parse_eol is set, i.e. \r\n is converted to single space. - * - * This flag is on by default. - */ - const unsigned int parse_wconv_attribute = 0x0040; - - /** - * This flag determines if XML document declaration (this node has the form of in XML) - * are to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed - * and checked for correctness. - * - * The corresponding node in DOM tree will have type node_declaration, name "xml" and attributes, - * if any. - * - * This flag is off by default. - */ - const unsigned int parse_declaration = 0x0100; - - /** - * This is the default set of flags. It includes parsing CDATA sections (comments/PIs are not - * parsed), performing character and entity reference expansion, replacing whitespace characters - * with spaces in attribute values and performing EOL handling. Note, that PCDATA sections - * consisting only of whitespace characters are not parsed (by default) for performance reasons. - */ - const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; - - /** - * These flags determine the encoding of input data for XML document. Default mode is encoding_auto, - * which means that document encoding is auto-detected from BOM and necessary encoding conversions are - * applied. You can override this mode by using any of the specific encodings. - */ - enum xml_encoding - { - encoding_auto, //!< Auto-detect input encoding using BOM or < / 800 - PUGIXML_DEPRECATED -#endif - const unsigned int format_write_bom_utf8 = format_write_bom; - - /** - * If this flag is on, no indentation is performed and no line breaks are written to output file. - * This means that the data is written to output stream as is. - * - * This flag is off by default. - */ - const unsigned int format_raw = 0x04; - - /** - * If this flag is on, no default XML declaration is written to output file. - * This means that there will be no XML declaration in output stream unless there was one in XML document - * (i.e. if it was parsed with parse_declaration flag). - * - * This flag is off by default. - */ - const unsigned int format_no_declaration = 0x08; - - /** - * This is the default set of formatting flags. It includes indenting nodes depending on their - * depth in DOM tree. - */ - const unsigned int format_default = format_indent; - - // Forward declarations - struct xml_attribute_struct; - struct xml_node_struct; - - class xml_node_iterator; - class xml_attribute_iterator; - - class xml_tree_walker; - - class xml_node; - - #ifndef PUGIXML_NO_XPATH - class xpath_node; - class xpath_node_set; - class xpath_ast_node; - class xpath_allocator; - - /// XPath query return type classification - enum xpath_value_type - { - xpath_type_none, ///< Unknown type (query failed to compile) - xpath_type_node_set, ///< Node set (xpath_node_set) - xpath_type_number, ///< Number - xpath_type_string, ///< String - xpath_type_boolean ///< Boolean - }; - - /// XPath query return type classification - /// \deprecated This type is deprecated and will be removed in future versions; use xpath_value_type instead - typedef xpath_value_type xpath_type_t; - - /** - * A class that holds compiled XPath query and allows to evaluate query result - */ - class PUGIXML_CLASS xpath_query - { - private: - // Non-copyable semantics - xpath_query(const xpath_query&); - xpath_query& operator=(const xpath_query&); - - xpath_allocator* m_alloc; - xpath_ast_node* m_root; - - void compile(const char_t* query); - - public: - /** - * Constructor from string with XPath expression. - * Throws xpath_exception on compilation error, std::bad_alloc on out of memory error. - * - * \param query - string with XPath expression - */ - explicit xpath_query(const char_t* query); - - /** - * Destructor - */ - ~xpath_query(); - - /** - * Get query expression return type - * - * \return expression return type - **/ - xpath_value_type return_type() const; - - /** - * Evaluate expression as boolean value for the context node \a n. - * If expression does not directly evaluate to boolean, the expression result is converted - * as through boolean() XPath function call. - * Throws std::bad_alloc on out of memory error. - * - * \param n - context node - * \return evaluation result - */ - bool evaluate_boolean(const xml_node& n) const; - - /** - * Evaluate expression as double value for the context node \a n. - * If expression does not directly evaluate to double, the expression result is converted - * as through number() XPath function call. - * Throws std::bad_alloc on out of memory error. - * - * \param n - context node - * \return evaluation result - */ - double evaluate_number(const xml_node& n) const; - - /** - * Evaluate expression as string value for the context node \a n. - * If expression does not directly evaluate to string, the expression result is converted - * as through string() XPath function call. - * Throws std::bad_alloc on out of memory error. - * - * \param n - context node - * \return evaluation result - */ - string_t evaluate_string(const xml_node& n) const; - - /** - * Evaluate expression as node set for the context node \a n. - * If expression does not directly evaluate to node set, throws xpath_exception. - * Throws std::bad_alloc on out of memory error. - * - * \param n - context node - * \return evaluation result - */ - xpath_node_set evaluate_node_set(const xml_node& n) const; - }; - #endif - - /** - * Abstract writer class - * \see xml_node::print - */ - class PUGIXML_CLASS xml_writer - { - public: - /** - * Virtual destructor - */ - virtual ~xml_writer() {} - - /** - * Write memory chunk into stream/file/whatever - * - * \param data - data pointer - * \param size - data size - */ - virtual void write(const void* data, size_t size) = 0; - }; - - /** xml_writer implementation for FILE* - * \see xml_writer - */ - class PUGIXML_CLASS xml_writer_file: public xml_writer - { - public: - /** - * Construct writer instance - * - * \param file - this is FILE* object, void* is used to avoid header dependencies on stdio - */ - xml_writer_file(void* file); - - virtual void write(const void* data, size_t size); - - private: - void* file; - }; - - #ifndef PUGIXML_NO_STL - /** xml_writer implementation for streams - * \see xml_writer - */ - class PUGIXML_CLASS xml_writer_stream: public xml_writer - { - public: - /** - * Construct writer instance - * - * \param stream - output stream object - */ - xml_writer_stream(std::basic_ostream >& stream); - - /** - * Construct writer instance - * - * \param stream - output stream object - */ - xml_writer_stream(std::basic_ostream >& stream); - - virtual void write(const void* data, size_t size); - - private: - std::basic_ostream >* narrow_stream; - std::basic_ostream >* wide_stream; - }; - #endif - - /** - * A light-weight wrapper for manipulating attributes in DOM tree. - * Note: xml_attribute does not allocate any memory for the attribute it wraps; it only wraps a - * pointer to existing attribute. - */ - class PUGIXML_CLASS xml_attribute - { - friend class xml_attribute_iterator; - friend class xml_node; - - private: - xml_attribute_struct* _attr; - - /// \internal Safe bool type -#ifdef __MWERKS__ - typedef bool (xml_attribute::*unspecified_bool_type)() const; -#else - typedef xml_attribute_struct* xml_attribute::*unspecified_bool_type; -#endif - - /// \internal Initializing constructor - explicit xml_attribute(xml_attribute_struct* attr); - - public: - /** - * Default constructor. Constructs an empty attribute. - */ - xml_attribute(); - - public: - /** - * Safe bool conversion. - * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. - */ - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator==(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator!=(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator<(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator>(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator<=(const xml_attribute& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator>=(const xml_attribute& r) const; - - public: - /** - * Get next attribute in attribute list of node that contains the attribute. - * - * \return next attribute, if any; empty attribute otherwise - */ - xml_attribute next_attribute() const; - - /** - * Get previous attribute in attribute list of node that contains the attribute. - * - * \return previous attribute, if any; empty attribute otherwise - */ - xml_attribute previous_attribute() const; - - /** - * Cast attribute value as int. - * - * \return attribute value as int, or 0 if conversion did not succeed or attribute is empty - */ - int as_int() const; - - /** - * Cast attribute value as unsigned int. - * - * \return attribute value as unsigned int, or 0 if conversion did not succeed or attribute is empty - * \note values out of non-negative int range (usually [0, 2^31-1]) get clamped to range boundaries - */ - unsigned int as_uint() const; - - /** - * Cast attribute value as double. - * - * \return attribute value as double, or 0.0 if conversion did not succeed or attribute is empty - */ - double as_double() const; - - /** - * Cast attribute value as float. - * - * \return attribute value as float, or 0.0f if conversion did not succeed or attribute is empty - */ - float as_float() const; - - /** - * Cast attribute value as bool. Returns true for attributes with values that start with '1', - * 't', 'T', 'y', 'Y', returns false for other attributes. - * - * \return attribute value as bool, or false if conversion did not succeed or attribute is empty - */ - bool as_bool() const; - - /// \internal Document order or 0 if not set - /// \deprecated This function is deprecated - PUGIXML_DEPRECATED unsigned int document_order() const; - - public: - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(const char_t* rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(int rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(unsigned int rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(double rhs); - - /** - * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false). - * - * \param rhs - new attribute value - * \return self - */ - xml_attribute& operator=(bool rhs); - - /** - * Set attribute name to \a rhs. - * - * \param rhs - new attribute name - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_name(const char_t* rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(const char_t* rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(int rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(unsigned int rhs); - - /** - * Set attribute value to \a rhs. - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(double rhs); - - /** - * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false). - * - * \param rhs - new attribute value - * \return success flag (call fails if attribute is empty or there is not enough memory) - */ - bool set_value(bool rhs); - - public: - /** - * Check if attribute is empty. - * - * \return true if attribute is empty, false otherwise - */ - bool empty() const; - - public: - /** - * Get attribute name. - * - * \return attribute name, or "" if attribute is empty - */ - const char_t* name() const; - - /** - * Get attribute value. - * - * \return attribute value, or "" if attribute is empty - */ - const char_t* value() const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs); -#endif - - /** - * A light-weight wrapper for manipulating nodes in DOM tree. - * Note: xml_node does not allocate any memory for the node it wraps; it only wraps a pointer to - * existing node. - */ - class PUGIXML_CLASS xml_node - { - friend class xml_attribute_iterator; - friend class xml_node_iterator; - - protected: - xml_node_struct* _root; - - /// \internal Safe bool type -#ifdef __MWERKS__ - typedef bool (xml_node::*unspecified_bool_type)() const; -#else - typedef xml_node_struct* xml_node::*unspecified_bool_type; -#endif - - /// \internal Initializing constructor - explicit xml_node(xml_node_struct* p); - - private: - template void all_elements_by_name_helper(const char_t* name, OutputIterator it) const - { - if (!_root) return; - - for (xml_node node = first_child(); node; node = node.next_sibling()) - { - if (node.type() == node_element) - { - if (impl::strequal(name, node.name())) - { - *it = node; - ++it; - } - - if (node.first_child()) node.all_elements_by_name_helper(name, it); - } - } - } - - template void all_elements_by_name_w_helper(const char_t* name, OutputIterator it) const - { - if (!_root) return; - - for (xml_node node = first_child(); node; node = node.next_sibling()) - { - if (node.type() == node_element) - { - if (impl::strequalwild(name, node.name())) - { - *it = node; - ++it; - } - - if (node.first_child()) node.all_elements_by_name_w_helper(name, it); - } - } - } - - public: - /** - * Default constructor. Constructs an empty node. - */ - xml_node(); - - public: - /** - * Safe bool conversion. - * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. - */ - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator==(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator!=(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator<(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator>(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator<=(const xml_node& r) const; - - /** - * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. - * - * \param r - value to compare to - * \return comparison result - */ - bool operator>=(const xml_node& r) const; - - public: - /** - * Node iterator type (for child nodes). - * \see xml_node_iterator - */ - typedef xml_node_iterator iterator; - - /** - * Node iterator type (for child nodes). - * \see xml_attribute_iterator - */ - typedef xml_attribute_iterator attribute_iterator; - - /** - * Access the begin iterator for this node's collection of child nodes. - * - * \return iterator that points to the first child node, or past-the-end iterator if node is empty or has no children - */ - iterator begin() const; - - /** - * Access the end iterator for this node's collection of child nodes. - * - * \return past-the-end iterator for child list - */ - iterator end() const; - - /** - * Access the begin iterator for this node's collection of attributes. - * - * \return iterator that points to the first attribute, or past-the-end iterator if node is empty or has no attributes - */ - attribute_iterator attributes_begin() const; - - /** - * Access the end iterator for this node's collection of attributes. - * - * \return past-the-end iterator for attribute list - */ - attribute_iterator attributes_end() const; - - public: - /** - * Check if node is empty. - * - * \return true if node is empty, false otherwise - */ - bool empty() const; - - public: - /** - * Get node type - * - * \return node type; node_null for empty nodes - */ - xml_node_type type() const; - - /** - * Get node name (element name for element nodes, PI target for PI) - * - * \return node name, if any; "" otherwise - */ - const char_t* name() const; - - /** - * Get node value (comment/PI/PCDATA/CDATA contents, depending on node type) - * - * \return node value, if any; "" otherwise - */ - const char_t* value() const; - - /** - * Get child with the specified name - * - * \param name - child name - * \return child with the specified name, if any; empty node otherwise - */ - xml_node child(const char_t* name) const; - - /** - * Get child with the name that matches specified pattern - * - * \param name - child name pattern - * \return child with the name that matches pattern, if any; empty node otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node child_w(const char_t* name) const; - - /** - * Get attribute with the specified name - * - * \param name - attribute name - * \return attribute with the specified name, if any; empty attribute otherwise - */ - xml_attribute attribute(const char_t* name) const; - - /** - * Get attribute with the name that matches specified pattern - * - * \param name - attribute name pattern - * \return attribute with the name that matches pattern, if any; empty attribute otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_attribute attribute_w(const char_t* name) const; - - /** - * Get first of following sibling nodes with the specified name - * - * \param name - sibling name - * \return node with the specified name, if any; empty node otherwise - */ - xml_node next_sibling(const char_t* name) const; - - /** - * Get first of the following sibling nodes with the name that matches specified pattern - * - * \param name - sibling name pattern - * \return node with the name that matches pattern, if any; empty node otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node next_sibling_w(const char_t* name) const; - - /** - * Get following sibling - * - * \return following sibling node, if any; empty node otherwise - */ - xml_node next_sibling() const; - - /** - * Get first of preceding sibling nodes with the specified name - * - * \param name - sibling name - * \return node with the specified name, if any; empty node otherwise - */ - xml_node previous_sibling(const char_t* name) const; - - /** - * Get first of the preceding sibling nodes with the name that matches specified pattern - * - * \param name - sibling name pattern - * \return node with the name that matches pattern, if any; empty node otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node previous_sibling_w(const char_t* name) const; - - /** - * Get preceding sibling - * - * \return preceding sibling node, if any; empty node otherwise - */ - xml_node previous_sibling() const; - - /** - * Get parent node - * - * \return parent node if any; empty node otherwise - */ - xml_node parent() const; - - /** - * Get root of DOM tree this node belongs to. - * - * \return tree root - */ - xml_node root() const; - - /** - * Get child value of current node; that is, value of the first child node of type PCDATA/CDATA - * - * \return child value of current node, if any; "" otherwise - */ - const char_t* child_value() const; - - /** - * Get child value of child with specified name. \see child_value - * node.child_value(name) is equivalent to node.child(name).child_value() - * - * \param name - child name - * \return child value of specified child node, if any; "" otherwise - */ - const char_t* child_value(const char_t* name) const; - - /** - * Get child value of child with name that matches the specified pattern. \see child_value - * node.child_value_w(name) is equivalent to node.child_w(name).child_value() - * - * \param name - child name pattern - * \return child value of specified child node, if any; "" otherwise - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED const char_t* child_value_w(const char_t* name) const; - - public: - /** - * Set node name to \a rhs (for PI/element nodes). \see name - * - * \param rhs - new node name - * \return success flag (call fails if node is of the wrong type or there is not enough memory) - */ - bool set_name(const char_t* rhs); - - /** - * Set node value to \a rhs (for PI/PCDATA/CDATA/comment nodes). \see value - * - * \param rhs - new node value - * \return success flag (call fails if node is of the wrong type or there is not enough memory) - */ - bool set_value(const char_t* rhs); - - /** - * Add attribute with specified name (for element nodes) - * - * \param name - attribute name - * \return added attribute, or empty attribute if there was an error (wrong node type) - */ - xml_attribute append_attribute(const char_t* name); - - /** - * Insert attribute with specified name after \a attr (for element nodes) - * - * \param name - attribute name - * \param attr - attribute to insert a new one after - * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) - */ - xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr); - - /** - * Insert attribute with specified name before \a attr (for element nodes) - * - * \param name - attribute name - * \param attr - attribute to insert a new one before - * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) - */ - xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr); - - /** - * Add a copy of the specified attribute (for element nodes) - * - * \param proto - attribute prototype which is to be copied - * \return inserted attribute, or empty attribute if there was an error (wrong node type) - */ - xml_attribute append_copy(const xml_attribute& proto); - - /** - * Insert a copy of the specified attribute after \a attr (for element nodes) - * - * \param proto - attribute prototype which is to be copied - * \param attr - attribute to insert a new one after - * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) - */ - xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); - - /** - * Insert a copy of the specified attribute before \a attr (for element nodes) - * - * \param proto - attribute prototype which is to be copied - * \param attr - attribute to insert a new one before - * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) - */ - xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); - - /** - * Add child node with specified type (for element nodes) - * - * \param type - node type - * \return added node, or empty node if there was an error (wrong node type) - */ - xml_node append_child(xml_node_type type = node_element); - - /** - * Insert child node with specified type after \a node (for element nodes) - * - * \param type - node type - * \param node - node to insert a new one after - * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) - */ - xml_node insert_child_after(xml_node_type type, const xml_node& node); - - /** - * Insert child node with specified type before \a node (for element nodes) - * - * \param type - node type - * \param node - node to insert a new one before - * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) - */ - xml_node insert_child_before(xml_node_type type, const xml_node& node); - - /** - * Add a copy of the specified node as a child (for element nodes) - * - * \param proto - node prototype which is to be copied - * \return inserted node, or empty node if there was an error (wrong node type) - */ - xml_node append_copy(const xml_node& proto); - - /** - * Insert a copy of the specified node after \a node (for element nodes) - * - * \param proto - node prototype which is to be copied - * \param node - node to insert a new one after - * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) - */ - xml_node insert_copy_after(const xml_node& proto, const xml_node& node); - - /** - * Insert a copy of the specified node before \a node (for element nodes) - * - * \param proto - node prototype which is to be copied - * \param node - node to insert a new one before - * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) - */ - xml_node insert_copy_before(const xml_node& proto, const xml_node& node); - - /** - * Remove specified attribute - * - * \param a - attribute to be removed - * \return success flag - */ - bool remove_attribute(const xml_attribute& a); - - /** - * Remove attribute with the specified name, if any - * - * \param name - attribute name - * \return success flag - */ - bool remove_attribute(const char_t* name); - - /** - * Remove specified child - * - * \param n - child node to be removed - * \return success flag - */ - bool remove_child(const xml_node& n); - - /** - * Remove child with the specified name, if any - * - * \param name - child name - * \return success flag - */ - bool remove_child(const char_t* name); - - public: - /** - * Get first attribute - * - * \return first attribute, if any; empty attribute otherwise - */ - xml_attribute first_attribute() const; - - /** - * Get last attribute - * - * \return last attribute, if any; empty attribute otherwise - */ - xml_attribute last_attribute() const; - - /** - * Get all elements from subtree with given name - * - * \param name - node name - * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) - * - * \deprecated This function is deprecated - */ - template PUGIXML_DEPRECATED void all_elements_by_name(const char_t* name, OutputIterator it) const - { - all_elements_by_name_helper(name, it); - } - - /** - * Get all elements from subtree with name that matches given pattern - * - * \param name - node name pattern - * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) - * - * \deprecated This function is deprecated - */ - template PUGIXML_DEPRECATED void all_elements_by_name_w(const char_t* name, OutputIterator it) const - { - all_elements_by_name_w_helper(name, it); - } - - /** - * Get first child - * - * \return first child, if any; empty node otherwise - */ - xml_node first_child() const; - - /** - * Get last child - * - * \return last child, if any; empty node otherwise - */ - xml_node last_child() const; - - /** - * Find attribute using predicate - * - * \param pred - predicate, that takes xml_attribute and returns bool - * \return first attribute for which predicate returned true, or empty attribute - */ - template xml_attribute find_attribute(Predicate pred) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute()) - if (pred(attrib)) - return attrib; - - return xml_attribute(); - } - - /** - * Find child node using predicate - * - * \param pred - predicate, that takes xml_node and returns bool - * \return first child node for which predicate returned true, or empty node - */ - template xml_node find_child(Predicate pred) const - { - if (!_root) return xml_node(); - - for (xml_node node = first_child(); node; node = node.next_sibling()) - if (pred(node)) - return node; - - return xml_node(); - } - - /** - * Find node from subtree using predicate - * - * \param pred - predicate, that takes xml_node and returns bool - * \return first node from subtree for which predicate returned true, or empty node - */ - template xml_node find_node(Predicate pred) const - { - if (!_root) return xml_node(); - - for (xml_node node = first_child(); node; node = node.next_sibling()) - { - if (pred(node)) - return node; - - if (node.first_child()) - { - xml_node found = node.find_node(pred); - if (found) return found; - } - } - - return xml_node(); - } - - /** - * Find child node with the specified name that has specified attribute - * - * \param name - child node name - * \param attr_name - attribute name of child node - * \param attr_value - attribute value of child node - * \return first matching child node, or empty node - */ - xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; - - /** - * Find child node with the specified name that has specified attribute (use pattern matching for node name and attribute name/value) - * - * \param name - pattern for child node name - * \param attr_name - pattern for attribute name of child node - * \param attr_value - pattern for attribute value of child node - * \return first matching child node, or empty node - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; - - /** - * Find child node that has specified attribute - * - * \param attr_name - attribute name of child node - * \param attr_value - attribute value of child node - * \return first matching child node, or empty node - */ - xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; - - /** - * Find child node that has specified attribute (use pattern matching for attribute name/value) - * - * \param attr_name - pattern for attribute name of child node - * \param attr_value - pattern for attribute value of child node - * \return first matching child node, or empty node - * - * \deprecated This function is deprecated - */ - PUGIXML_DEPRECATED xml_node find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const; - - #ifndef PUGIXML_NO_STL - /** - * Get the absolute node path from root as a text string. - * - * \param delimiter - delimiter character to insert between element names - * \return path string (e.g. '/bookstore/book/author'). - */ - string_t path(char_t delimiter = '/') const; - #endif - - /** - * Search for a node by path. - * \param path - path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative - * to root), '../foo/bar'. - * \param delimiter - delimiter character to use while tokenizing path - * \return matching node, if any; empty node otherwise - */ - xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const; - - /** - * Recursively traverse subtree with xml_tree_walker - * \see xml_tree_walker::begin - * \see xml_tree_walker::for_each - * \see xml_tree_walker::end - * - * \param walker - tree walker to traverse subtree with - * \return traversal result - */ - bool traverse(xml_tree_walker& walker); - - #ifndef PUGIXML_NO_XPATH - /** - * Select single node by evaluating XPath query - * - * \param query - query string - * \return first node from the resulting node set by document order, or empty node if none found - */ - xpath_node select_single_node(const char_t* query) const; - - /** - * Select single node by evaluating XPath query - * - * \param query - compiled query - * \return first node from the resulting node set by document order, or empty node if none found - */ - xpath_node select_single_node(const xpath_query& query) const; - - /** - * Select node set by evaluating XPath query - * - * \param query - query string - * \return resulting node set - */ - xpath_node_set select_nodes(const char_t* query) const; - - /** - * Select node set by evaluating XPath query - * - * \param query - compiled query - * \return resulting node set - */ - xpath_node_set select_nodes(const xpath_query& query) const; - #endif - - /// \internal Document order or 0 if not set - /// \deprecated This function is deprecated - PUGIXML_DEPRECATED unsigned int document_order() const; - - /** - * Print subtree to writer - * - * \param writer - writer object - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - * \param depth - starting depth (used for indentation) - */ - void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - - #ifndef PUGIXML_NO_STL - /** - * Print subtree to stream - * - * \param os - output stream - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - * \param depth - starting depth (used for indentation) - */ - void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - - /** - * Print subtree to stream - * - * \param os - output stream - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - * \param depth - starting depth (used for indentation) - */ - void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const; - #endif - - /** - * Get node offset in parsed file/string (in bytes) for debugging purposes - * - * \return offset in bytes to start of node data, or -1 in case of error - * \note This will return -1 if node information changed to the extent that it's no longer possible to calculate offset, for example - * if element node name has significantly changed; this is guaranteed to return correct offset only for nodes that have not changed - * since parsing. - */ - ptrdiff_t offset_debug() const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs); -#endif - - /** - * Child node iterator. - * It's a bidirectional iterator with value type 'xml_node'. - */ - class PUGIXML_CLASS xml_node_iterator - { - friend class xml_node; - - private: - xml_node _wrap; - xml_node _parent; - - /// \internal Initializing constructor - xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent); - - public: - /** - * Iterator traits - */ - typedef ptrdiff_t difference_type; - typedef xml_node value_type; - typedef xml_node* pointer; - typedef xml_node& reference; - - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif - - /** - * Default constructor - */ - xml_node_iterator(); - - /** - * Initializing constructor - * - * \param node - node that iterator will point at - */ - xml_node_iterator(const xml_node& node); - - /** - * Check if this iterator is equal to \a rhs - * - * \param rhs - other iterator - * \return comparison result - */ - bool operator==(const xml_node_iterator& rhs) const; - - /** - * Check if this iterator is not equal to \a rhs - * - * \param rhs - other iterator - * \return comparison result - */ - bool operator!=(const xml_node_iterator& rhs) const; - - /** - * Dereferencing operator - * - * \return reference to the node iterator points at - */ - xml_node& operator*(); - - /** - * Member access operator - * - * \return pointer to the node iterator points at - */ - xml_node* operator->(); - - /** - * Pre-increment operator - * - * \return self - */ - const xml_node_iterator& operator++(); - - /** - * Post-increment operator - * - * \return old value - */ - xml_node_iterator operator++(int); - - /** - * Pre-decrement operator - * - * \return self - */ - const xml_node_iterator& operator--(); - - /** - * Post-decrement operator - * - * \return old value - */ - xml_node_iterator operator--(int); - }; - - /** - * Attribute iterator. - * It's a bidirectional iterator with value type 'xml_attribute'. - */ - class PUGIXML_CLASS xml_attribute_iterator - { - friend class xml_node; - - private: - xml_attribute _wrap; - xml_node _parent; - - /// \internal Initializing constructor - xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent); - - public: - /** - * Iterator traits - */ - typedef ptrdiff_t difference_type; - typedef xml_attribute value_type; - typedef xml_attribute* pointer; - typedef xml_attribute& reference; - - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif - - /** - * Default constructor - */ - xml_attribute_iterator(); - - /** - * Initializing constructor - * - * \param attr - attribute that iterator will point at - * \param parent - parent node of the attribute - */ - xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent); - - /** - * Check if this iterator is equal to \a rhs - * - * \param rhs - other iterator - * \return comparison result - */ - bool operator==(const xml_attribute_iterator& rhs) const; - - /** - * Check if this iterator is not equal to \a rhs - * - * \param rhs - other iterator - * \return comparison result - */ - bool operator!=(const xml_attribute_iterator& rhs) const; - - /** - * Dereferencing operator - * - * \return reference to the node iterator points at - */ - xml_attribute& operator*(); - - /** - * Member access operator - * - * \return pointer to the node iterator points at - */ - xml_attribute* operator->(); - - /** - * Pre-increment operator - * - * \return self - */ - const xml_attribute_iterator& operator++(); - - /** - * Post-increment operator - * - * \return old value - */ - xml_attribute_iterator operator++(int); - - /** - * Pre-decrement operator - * - * \return self - */ - const xml_attribute_iterator& operator--(); - - /** - * Post-decrement operator - * - * \return old value - */ - xml_attribute_iterator operator--(int); - }; - - /** - * Abstract tree walker class - * \see xml_node::traverse - */ - class PUGIXML_CLASS xml_tree_walker - { - friend class xml_node; - - private: - int _depth; - - protected: - /** - * Get node depth - * - * \return node depth - */ - int depth() const; - - public: - /** - * Default constructor - */ - xml_tree_walker(); - - /** - * Virtual destructor - */ - virtual ~xml_tree_walker(); - - public: - /** - * Callback that is called when traversal of node begins. - * - * \return returning false will abort the traversal - */ - virtual bool begin(xml_node&); - - /** - * Callback that is called for each node traversed - * - * \return returning false will abort the traversal - */ - virtual bool for_each(xml_node&) = 0; - - /** - * Callback that is called when traversal of node ends. - * - * \return returning false will abort the traversal - */ - virtual bool end(xml_node&); - }; - - /** - * Struct used to distinguish parsing with ownership transfer from parsing without it. - * \see xml_document::parse - */ - struct transfer_ownership_tag {}; - - /** - * Parsing status enumeration, returned as part of xml_parse_result struct - */ - enum xml_parse_status - { - status_ok = 0, ///< No error - - status_file_not_found, ///< File was not found during load_file() - status_io_error, ///< Error reading from file/stream - status_out_of_memory, ///< Could not allocate memory - status_internal_error, ///< Internal error occurred - - status_unrecognized_tag, ///< Parser could not determine tag type - - status_bad_pi, ///< Parsing error occurred while parsing document declaration/processing instruction () - status_bad_comment, ///< Parsing error occurred while parsing comment () - status_bad_cdata, ///< Parsing error occurred while parsing CDATA section () - status_bad_doctype, ///< Parsing error occurred while parsing document type declaration - status_bad_pcdata, ///< Parsing error occurred while parsing PCDATA section (>...<) - status_bad_start_element, ///< Parsing error occurred while parsing start element tag () - status_bad_attribute, ///< Parsing error occurred while parsing element attribute - status_bad_end_element, ///< Parsing error occurred while parsing end element tag () - status_end_element_mismatch ///< There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) - }; - - /** - * Parser result - */ - struct PUGIXML_CLASS xml_parse_result - { - /// Parsing status (\see xml_parse_status) - xml_parse_status status; - - /// Last parsed offset (in bytes from file/string start) - ptrdiff_t offset; - - /// Source document encoding - xml_encoding encoding; - - /// Cast to bool operator - operator bool() const - { - return status == status_ok; - } - - /// Get error description - const char* description() const; - }; - - /** - * Document class (DOM tree root). - * This class has non-copyable semantics (private copy constructor/assignment operator). - */ - class PUGIXML_CLASS xml_document: public xml_node - { - private: - char_t* _buffer; - - char _memory[192]; - - xml_document(const xml_document&); - const xml_document& operator=(const xml_document&); - - void create(); - void destroy(); - - xml_parse_result load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own); - - public: - /** - * Default constructor, makes empty document - */ - xml_document(); - - /** - * Destructor - */ - ~xml_document(); - - public: - #ifndef PUGIXML_NO_STL - /** - * Load document from stream. - * - * \param stream - stream with XML data - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Load document from stream. - * - * \param stream - stream with XML data - * \param options - parsing options - * \return parsing result - */ - xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default); - #endif - - /** - * Load document from string. String has to be zero-terminated. No encoding conversions are applied. - * - * \param contents - input string - * \param options - parsing options - * \return parsing result - */ - xml_parse_result load(const char_t* contents, unsigned int options = parse_default); - - /** - * Parse the given XML string in-situ. - * The string is modified; you should ensure that string data will persist throughout the - * document's lifetime. Although, document does not gain ownership over the string, so you - * should free the memory occupied by it manually. - * - * \param xmlstr - read/write string with XML data - * \param options - parsing options - * \return parsing result - * - * \deprecated This function is deprecated and will be removed in future versions; use xml_document::load_buffer_inplace instead - */ - PUGIXML_DEPRECATED xml_parse_result parse(char* xmlstr, unsigned int options = parse_default); - - /** - * Parse the given XML string in-situ (gains ownership). - * The string is modified; document gains ownership over the string, so you don't have to worry - * about it's lifetime. - * Call example: doc.parse(transfer_ownership_tag(), string, options); - * - * \param xmlstr - read/write string with XML data - * \param options - parsing options - * \return parsing result - * - * \deprecated This function is deprecated and will be removed in future versions; use xml_document::load_buffer_inplace_own instead - */ - PUGIXML_DEPRECATED xml_parse_result parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options = parse_default); - - /** - * Load document from file - * - * \param path - file path - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Load document from buffer - * - * \param contents - buffer contents - * \param size - buffer size in bytes - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Load document from buffer in-situ. - * The buffer is modified; you should ensure that buffer data will persist throughout the document's - * lifetime. Document does not gain ownership over the buffer, so you should free the buffer memory manually. - * - * \param contents - buffer contents - * \param size - buffer size in bytes - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Load document from buffer in-situ (gains buffer ownership). - * The buffer is modified; you should ensure that buffer data will persist throughout the document's - * lifetime. Document gains ownership over the buffer, so you should allocate the buffer with pugixml - * allocation function. - * - * \param contents - buffer contents - * \param size - buffer size in bytes - * \param options - parsing options - * \param encoding - source data encoding - * \return parsing result - */ - xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - /** - * Save XML to writer - * - * \param writer - writer object - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - */ - void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - #ifndef PUGIXML_NO_STL - /** - * Save XML to stream - * - * \param stream - output stream - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - */ - void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - /** - * Save XML to stream - * - * \param stream - output stream - * \param indent - indentation string - * \param flags - formatting flags - */ - void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const; - #endif - - /** - * Save XML to file - * - * \param path - file path - * \param indent - indentation string - * \param flags - formatting flags - * \param encoding - encoding used for writing - * \return success flag - */ - bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - /** - * Compute document order for the whole tree - * Sometimes this makes evaluation of XPath queries faster. - */ - PUGIXML_DEPRECATED void precompute_document_order(); - }; - -#ifndef PUGIXML_NO_XPATH - /** - * XPath exception class. - */ - class PUGIXML_CLASS xpath_exception: public std::exception - { - private: - const char* m_message; - - public: - /** - * Construct exception from static error string - * - * \param message - error string - */ - explicit xpath_exception(const char* message); - - /** - * Return error message - * - * \return error message - */ - virtual const char* what() const throw(); - }; - - /** - * XPath node class. - * - * XPath defines node to be either xml_node or xml_attribute in pugixml terminology, so xpath_node - * is either xml_node or xml_attribute. - */ - class PUGIXML_CLASS xpath_node - { - private: - xml_node m_node; - xml_attribute m_attribute; - - /// \internal Safe bool type - typedef xml_node xpath_node::*unspecified_bool_type; - - public: - /** - * Construct empty XPath node - */ - xpath_node(); - - /** - * Construct XPath node from XML node - * - * \param node - XML node - */ - xpath_node(const xml_node& node); - - /** - * Construct XPath node from XML attribute - * - * \param attribute - XML attribute - * \param parent - attribute's parent node - */ - xpath_node(const xml_attribute& attribute, const xml_node& parent); - - /** - * Get XML node, if any - * - * \return contained XML node, empty node otherwise - */ - xml_node node() const; - - /** - * Get XML attribute, if any - * - * \return contained XML attribute, if any, empty attribute otherwise - */ - xml_attribute attribute() const; - - /** - * Get parent of contained XML attribute, if any - * - * \return parent of contained XML attribute, if any, empty node otherwise - */ - xml_node parent() const; - - /** - * Safe bool conversion. - * Allows xpath_node to be used in a context where boolean variable is expected, such as 'if (node)'. - */ - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - /** - * Compares two XPath nodes - * - * \param n - XPath node to compare to - * \return comparison result - */ - bool operator==(const xpath_node& n) const; - - /** - * Compares two XPath nodes - * - * \param n - XPath node to compare to - * \return comparison result - */ - bool operator!=(const xpath_node& n) const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs); -#endif - - /** - * Not necessarily ordered constant collection of XPath nodes - */ - class PUGIXML_CLASS xpath_node_set - { - friend class xpath_ast_node; - - public: - /// Collection type - enum type_t - { - type_unsorted, ///< Not ordered - type_sorted, ///< Sorted by document order (ascending) - type_sorted_reverse ///< Sorted by document order (descending) - }; - - /// Constant iterator type - typedef const xpath_node* const_iterator; - - private: - type_t m_type; - - xpath_node m_storage; - - xpath_node* m_begin; - xpath_node* m_end; - xpath_node* m_eos; - - typedef xpath_node* iterator; - - iterator mut_begin(); - - void push_back(const xpath_node& n); - - void append(const_iterator begin, const_iterator end); - - void truncate(iterator it); - - void remove_duplicates(); - - public: - /** - * Default constructor - * Constructs empty set - */ - xpath_node_set(); - - /** - * Destructor - */ - ~xpath_node_set(); - - /** - * Copy constructor - * - * \param ns - set to copy - */ - xpath_node_set(const xpath_node_set& ns); - - /** - * Assignment operator - * - * \param ns - set to assign - * \return self - */ - xpath_node_set& operator=(const xpath_node_set& ns); - - /** - * Get collection type - * - * \return collection type - */ - type_t type() const; - - /** - * Get collection size - * - * \return collection size - */ - size_t size() const; - - /** - * Get element with the specified index - * - * \param index - requested index - * \return element - */ - const xpath_node& operator[](size_t index) const; - - /** - * Get begin constant iterator for collection - * - * \return begin constant iterator - */ - const_iterator begin() const; - - /** - * Get end iterator for collection - * - * \return end iterator - */ - const_iterator end() const; - - /** - * Sort the collection in ascending/descending order by document order - * - * \param reverse - whether to sort in ascending (false) or descending (true) order - */ - void sort(bool reverse = false); - - /** - * Get first node in the collection by document order - * - * \return first node by document order - * \note set.first() is not equal to set[0], since operator[] does not take document order into account - */ - xpath_node first() const; - - /** - * Return true if collection is empty - * - * \return true if collection is empty, false otherwise - */ - bool empty() const; - }; -#endif - -#ifndef PUGIXML_NO_STL - /** - * Convert wide string to UTF8 - * - * \param str - input wide string string - * \return output UTF8 string - */ - std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const wchar_t* str); - - /** - * Convert UTF8 to wide string - * - * \param str - input UTF8 string - * \return output wide string string - * - * \deprecated This function is deprecated and will be removed in future versions; use as_wide instead - */ - PUGIXML_DEPRECATED std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf16(const char* str); - - /** - * Convert UTF8 to wide string - * - * \param str - input UTF8 string - * \return output wide string string - */ - std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const char* str); -#endif - - /** - * Memory allocation function - * - * \param size - allocation size - * \return pointer to allocated memory on success, NULL on failure - */ - typedef void* (*allocation_function)(size_t size); - - /** - * Memory deallocation function - * - * \param ptr - pointer to memory previously allocated by allocation function - */ - typedef void (*deallocation_function)(void* ptr); - - /** - * Override default memory management functions - * - * All subsequent allocations/deallocations will be performed via supplied functions. Take care not to - * change memory management functions if any xml_document instances are still alive - this is considered - * undefined behaviour (expect crashes/memory damages/etc.). - * - * \param allocate - allocation function - * \param deallocate - deallocation function - * - * \note XPath-related allocations, as well as allocations in functions that return std::string (xml_node::path, as_utf8, as_wide) - * are not performed via these functions. - * \note If you're using parse() with ownership transfer, you have to allocate the buffer you pass to parse() with allocation - * function you set via this function. - */ - void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); - - /** - * Get current memory allocation function - * - * \return memory allocation function - * \see set_memory_management_functions - */ - allocation_function PUGIXML_FUNCTION get_memory_allocation_function(); - - /** - * Get current memory deallocation function - * - * \return memory deallocation function - * \see set_memory_management_functions - */ - deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function(); -} - -#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) -namespace std -{ - // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&); - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&); -} -#endif - -#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) -namespace std -{ - // Workarounds for (non-standard) iterator category detection - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&); - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&); -} -#endif - -#endif - -/** - * Copyright (c) 2006-2010 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/** + * pugixml parser - version 0.9 + * -------------------------------------------------------- + * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at http://code.google.com/p/pugixml/ + * + * This library is distributed under the MIT License. See notice at the end + * of this file. + * + * This work is based on the pugxml parser, which is: + * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) + */ + +#ifndef HEADER_PUGIXML_HPP +#define HEADER_PUGIXML_HPP + +#include "pugiconfig.hpp" + +#ifndef PUGIXML_NO_STL +namespace std +{ + struct bidirectional_iterator_tag; + +#ifdef __SUNPRO_CC + // Sun C++ compiler has a bug which forces template argument names in forward declarations to be the same as in actual definitions + template class allocator; + template struct char_traits; + template class basic_istream; + template class basic_ostream; + template class basic_string; +#else + // Borland C++ compiler has a bug which forces template argument names in forward declarations to be the same as in actual definitions + template class allocator; + template struct char_traits; + template class basic_istream; + template class basic_ostream; + template class basic_string; +#endif + + // Digital Mars compiler has a bug which requires a forward declaration for explicit instantiation (otherwise type selection is messed up later, producing link errors) + // Also note that we have to declare char_traits as a class here, since it's defined that way +#ifdef __DMC__ + template <> class char_traits; +#endif +} +#endif + +// Macro for deprecated features +#ifndef PUGIXML_DEPRECATED +# if defined(__GNUC__) +# define PUGIXML_DEPRECATED __attribute__((deprecated)) +# elif defined(_MSC_VER) && _MSC_VER >= 1300 +# define PUGIXML_DEPRECATED __declspec(deprecated) +# else +# define PUGIXML_DEPRECATED +# endif +#endif + +// No XPath without STL or exceptions +#if (defined(PUGIXML_NO_STL) || defined(PUGIXML_NO_EXCEPTIONS)) && !defined(PUGIXML_NO_XPATH) +# define PUGIXML_NO_XPATH +#endif + +// Include exception header for XPath +#ifndef PUGIXML_NO_XPATH +# include +#endif + +// If no API is defined, assume default +#ifndef PUGIXML_API +# define PUGIXML_API +#endif + +// If no API for classes is defined, assume default +#ifndef PUGIXML_CLASS +# define PUGIXML_CLASS PUGIXML_API +#endif + +// If no API for functions is defined, assume default +#ifndef PUGIXML_FUNCTION +# define PUGIXML_FUNCTION PUGIXML_API +#endif + +#include + +// Character interface macros +#ifdef PUGIXML_WCHAR_MODE +# define PUGIXML_TEXT(t) L ## t + +namespace pugi +{ + /// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE + typedef wchar_t char_t; + +#ifndef PUGIXML_NO_STL + /// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE + typedef std::basic_string, std::allocator > string_t; +#endif +} +#else +# define PUGIXML_TEXT(t) t + +namespace pugi +{ + /// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE + typedef char char_t; + +# ifndef PUGIXML_NO_STL + // GCC 3.4 has a bug which prevents string_t instantiation using char_t, so we have to use char type explicitly + /// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE + typedef std::basic_string, std::allocator > string_t; +# endif +} +#endif + +// Helpers for inline implementation +namespace pugi +{ + namespace impl + { + bool PUGIXML_FUNCTION strequal(const char_t*, const char_t*); + bool PUGIXML_FUNCTION strequalwild(const char_t*, const char_t*); + } +} + +/// The PugiXML Parser namespace. +namespace pugi +{ + /// Tree node classification. + enum xml_node_type + { + node_null, ///< Undifferentiated entity + node_document, ///< A document tree's absolute root. + node_element, ///< E.g. '<...>' + node_pcdata, ///< E.g. '>...<' + node_cdata, ///< E.g. '' + node_comment, ///< E.g. '' + node_pi, ///< E.g. '' + node_declaration ///< E.g. '' + }; + + // Parsing options + + /** + * Minimal parsing mode. Equivalent to turning all other flags off. This set of flags means + * that pugixml does not add pi/cdata sections or comments to DOM tree and does not perform + * any conversions for input data, meaning fastest parsing. + */ + const unsigned int parse_minimal = 0x0000; + + /** + * This flag determines if processing instructions (nodes with type node_pi; such nodes have the + * form of or in XML) are to be put in DOM tree. If this flag is off, + * they are not put in the tree, but are still parsed and checked for correctness. + * + * The corresponding node in DOM tree will have type node_pi, name "target" and value "content", + * if any. + * + * Note that (document declaration) is not considered to be a PI. + * + * This flag is off by default. + */ + const unsigned int parse_pi = 0x0001; + + /** + * This flag determines if comments (nodes with type node_comment; such nodes have the form of + * in XML) are to be put in DOM tree. If this flag is off, they are not put in + * the tree, but are still parsed and checked for correctness. + * + * The corresponding node in DOM tree will have type node_comment, empty name and value "content". + * + * This flag is off by default. + */ + const unsigned int parse_comments = 0x0002; + + /** + * This flag determines if CDATA sections (nodes with type node_cdata; such nodes have the form + * of in XML) are to be put in DOM tree. If this flag is off, they are not + * put in the tree, but are still parsed and checked for correctness. + * + * The corresponding node in DOM tree will have type node_cdata, empty name and value "content". + * + * This flag is on by default. + */ + const unsigned int parse_cdata = 0x0004; + + /** + * This flag determines if nodes with PCDATA (regular text) that consist only of whitespace + * characters are to be put in DOM tree. Often whitespace-only data is not significant for the + * application, and the cost of allocating and storing such nodes (both memory and speed-wise) + * can be significant. For example, after parsing XML string " ", element + * will have 3 children when parse_ws_pcdata is set (child with type node_pcdata and value=" ", + * child with type node_element and name "a", and another child with type node_pcdata and + * value=" "), and only 1 child when parse_ws_pcdata is not set. + * + * This flag is off by default. + */ + const unsigned int parse_ws_pcdata = 0x0008; + + /** + * This flag determines if character and entity references are to be expanded during the parsing + * process. Character references are &#...; or &#x...; (... is Unicode numeric representation of + * character in either decimal (&#...;) or hexadecimal (&#x...;) form), entity references are &...; + * Note that as pugixml does not handle DTD, the only allowed entities are predefined ones - + * &lt;, &gt;, &amp;, &apos; and &quot;. If character/entity reference can not be expanded, it is + * leaved as is, so you can do additional processing later. + * Reference expansion is performed in attribute values and PCDATA content. + * + * This flag is on by default. + */ + const unsigned int parse_escapes = 0x0010; + + /** + * This flag determines if EOL handling (that is, replacing sequences 0x0d 0x0a by a single 0x0a + * character, and replacing all standalone 0x0d characters by 0x0a) is to be performed on input + * data (that is, comments contents, PCDATA/CDATA contents and attribute values). + * + * This flag is on by default. + */ + const unsigned int parse_eol = 0x0020; + + /** + * This flag determines if attribute value normalization should be performed for all attributes. + * This means, that: + * 1. Whitespace characters (new line, tab and space) are replaced with space (' ') + * 2. Afterwards sequences of spaces are replaced with a single space + * 3. Leading/trailing whitespace characters are trimmed + * + * This flag is off by default. + */ + const unsigned int parse_wnorm_attribute = 0x0080; + + /** + * This flag determines if attribute value normalization should be performed for all attributes. + * This means, that whitespace characters (new line, tab and space) are replaced with space (' '). + * Note, that the actions performed while this flag is on are also performed if parse_wnorm_attribute + * is on, so this flag has no effect if parse_wnorm_attribute flag is set. + * New line characters are always treated as if parse_eol is set, i.e. \r\n is converted to single space. + * + * This flag is on by default. + */ + const unsigned int parse_wconv_attribute = 0x0040; + + /** + * This flag determines if XML document declaration (this node has the form of in XML) + * are to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed + * and checked for correctness. + * + * The corresponding node in DOM tree will have type node_declaration, name "xml" and attributes, + * if any. + * + * This flag is off by default. + */ + const unsigned int parse_declaration = 0x0100; + + /** + * This is the default set of flags. It includes parsing CDATA sections (comments/PIs are not + * parsed), performing character and entity reference expansion, replacing whitespace characters + * with spaces in attribute values and performing EOL handling. Note, that PCDATA sections + * consisting only of whitespace characters are not parsed (by default) for performance reasons. + */ + const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; + + /** + * These flags determine the encoding of input data for XML document. Default mode is encoding_auto, + * which means that document encoding is auto-detected from BOM and necessary encoding conversions are + * applied. You can override this mode by using any of the specific encodings. + */ + enum xml_encoding + { + encoding_auto, //!< Auto-detect input encoding using BOM or < / 800 + PUGIXML_DEPRECATED +#endif + const unsigned int format_write_bom_utf8 = format_write_bom; + + /** + * If this flag is on, no indentation is performed and no line breaks are written to output file. + * This means that the data is written to output stream as is. + * + * This flag is off by default. + */ + const unsigned int format_raw = 0x04; + + /** + * If this flag is on, no default XML declaration is written to output file. + * This means that there will be no XML declaration in output stream unless there was one in XML document + * (i.e. if it was parsed with parse_declaration flag). + * + * This flag is off by default. + */ + const unsigned int format_no_declaration = 0x08; + + /** + * This is the default set of formatting flags. It includes indenting nodes depending on their + * depth in DOM tree. + */ + const unsigned int format_default = format_indent; + + // Forward declarations + struct xml_attribute_struct; + struct xml_node_struct; + + class xml_node_iterator; + class xml_attribute_iterator; + + class xml_tree_walker; + + class xml_node; + + #ifndef PUGIXML_NO_XPATH + class xpath_node; + class xpath_node_set; + class xpath_ast_node; + class xpath_allocator; + + /// XPath query return type classification + enum xpath_value_type + { + xpath_type_none, ///< Unknown type (query failed to compile) + xpath_type_node_set, ///< Node set (xpath_node_set) + xpath_type_number, ///< Number + xpath_type_string, ///< String + xpath_type_boolean ///< Boolean + }; + + /// XPath query return type classification + /// \deprecated This type is deprecated and will be removed in future versions; use xpath_value_type instead + typedef xpath_value_type xpath_type_t; + + /** + * A class that holds compiled XPath query and allows to evaluate query result + */ + class PUGIXML_CLASS xpath_query + { + private: + // Non-copyable semantics + xpath_query(const xpath_query&); + xpath_query& operator=(const xpath_query&); + + xpath_allocator* m_alloc; + xpath_ast_node* m_root; + + void compile(const char_t* query); + + public: + /** + * Constructor from string with XPath expression. + * Throws xpath_exception on compilation error, std::bad_alloc on out of memory error. + * + * \param query - string with XPath expression + */ + explicit xpath_query(const char_t* query); + + /** + * Destructor + */ + ~xpath_query(); + + /** + * Get query expression return type + * + * \return expression return type + **/ + xpath_value_type return_type() const; + + /** + * Evaluate expression as boolean value for the context node \a n. + * If expression does not directly evaluate to boolean, the expression result is converted + * as through boolean() XPath function call. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + bool evaluate_boolean(const xml_node& n) const; + + /** + * Evaluate expression as double value for the context node \a n. + * If expression does not directly evaluate to double, the expression result is converted + * as through number() XPath function call. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + double evaluate_number(const xml_node& n) const; + + /** + * Evaluate expression as string value for the context node \a n. + * If expression does not directly evaluate to string, the expression result is converted + * as through string() XPath function call. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + string_t evaluate_string(const xml_node& n) const; + + /** + * Evaluate expression as node set for the context node \a n. + * If expression does not directly evaluate to node set, throws xpath_exception. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + xpath_node_set evaluate_node_set(const xml_node& n) const; + }; + #endif + + /** + * Abstract writer class + * \see xml_node::print + */ + class PUGIXML_CLASS xml_writer + { + public: + /** + * Virtual destructor + */ + virtual ~xml_writer() {} + + /** + * Write memory chunk into stream/file/whatever + * + * \param data - data pointer + * \param size - data size + */ + virtual void write(const void* data, size_t size) = 0; + }; + + /** xml_writer implementation for FILE* + * \see xml_writer + */ + class PUGIXML_CLASS xml_writer_file: public xml_writer + { + public: + /** + * Construct writer instance + * + * \param file - this is FILE* object, void* is used to avoid header dependencies on stdio + */ + xml_writer_file(void* file); + + virtual void write(const void* data, size_t size); + + private: + void* file; + }; + + #ifndef PUGIXML_NO_STL + /** xml_writer implementation for streams + * \see xml_writer + */ + class PUGIXML_CLASS xml_writer_stream: public xml_writer + { + public: + /** + * Construct writer instance + * + * \param stream - output stream object + */ + xml_writer_stream(std::basic_ostream >& stream); + + /** + * Construct writer instance + * + * \param stream - output stream object + */ + xml_writer_stream(std::basic_ostream >& stream); + + virtual void write(const void* data, size_t size); + + private: + std::basic_ostream >* narrow_stream; + std::basic_ostream >* wide_stream; + }; + #endif + + /** + * A light-weight wrapper for manipulating attributes in DOM tree. + * Note: xml_attribute does not allocate any memory for the attribute it wraps; it only wraps a + * pointer to existing attribute. + */ + class PUGIXML_CLASS xml_attribute + { + friend class xml_attribute_iterator; + friend class xml_node; + + private: + xml_attribute_struct* _attr; + + /// \internal Safe bool type +#ifdef __MWERKS__ + typedef bool (xml_attribute::*unspecified_bool_type)() const; +#else + typedef xml_attribute_struct* xml_attribute::*unspecified_bool_type; +#endif + + /// \internal Initializing constructor + explicit xml_attribute(xml_attribute_struct* attr); + + public: + /** + * Default constructor. Constructs an empty attribute. + */ + xml_attribute(); + + public: + /** + * Safe bool conversion. + * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. + */ + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator==(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator!=(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<=(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>=(const xml_attribute& r) const; + + public: + /** + * Get next attribute in attribute list of node that contains the attribute. + * + * \return next attribute, if any; empty attribute otherwise + */ + xml_attribute next_attribute() const; + + /** + * Get previous attribute in attribute list of node that contains the attribute. + * + * \return previous attribute, if any; empty attribute otherwise + */ + xml_attribute previous_attribute() const; + + /** + * Cast attribute value as int. + * + * \return attribute value as int, or 0 if conversion did not succeed or attribute is empty + */ + int as_int() const; + + /** + * Cast attribute value as unsigned int. + * + * \return attribute value as unsigned int, or 0 if conversion did not succeed or attribute is empty + * \note values out of non-negative int range (usually [0, 2^31-1]) get clamped to range boundaries + */ + unsigned int as_uint() const; + + /** + * Cast attribute value as double. + * + * \return attribute value as double, or 0.0 if conversion did not succeed or attribute is empty + */ + double as_double() const; + + /** + * Cast attribute value as float. + * + * \return attribute value as float, or 0.0f if conversion did not succeed or attribute is empty + */ + float as_float() const; + + /** + * Cast attribute value as bool. Returns true for attributes with values that start with '1', + * 't', 'T', 'y', 'Y', returns false for other attributes. + * + * \return attribute value as bool, or false if conversion did not succeed or attribute is empty + */ + bool as_bool() const; + + /// \internal Document order or 0 if not set + /// \deprecated This function is deprecated + PUGIXML_DEPRECATED unsigned int document_order() const; + + public: + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(const char_t* rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(unsigned int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(double rhs); + + /** + * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false). + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(bool rhs); + + /** + * Set attribute name to \a rhs. + * + * \param rhs - new attribute name + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_name(const char_t* rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(const char_t* rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(unsigned int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(double rhs); + + /** + * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false). + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(bool rhs); + + public: + /** + * Check if attribute is empty. + * + * \return true if attribute is empty, false otherwise + */ + bool empty() const; + + public: + /** + * Get attribute name. + * + * \return attribute name, or "" if attribute is empty + */ + const char_t* name() const; + + /** + * Get attribute value. + * + * \return attribute value, or "" if attribute is empty + */ + const char_t* value() const; + }; + +#ifdef __BORLANDC__ + // Borland C++ workaround + bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs); + bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs); +#endif + + /** + * A light-weight wrapper for manipulating nodes in DOM tree. + * Note: xml_node does not allocate any memory for the node it wraps; it only wraps a pointer to + * existing node. + */ + class PUGIXML_CLASS xml_node + { + friend class xml_attribute_iterator; + friend class xml_node_iterator; + + protected: + xml_node_struct* _root; + + /// \internal Safe bool type +#ifdef __MWERKS__ + typedef bool (xml_node::*unspecified_bool_type)() const; +#else + typedef xml_node_struct* xml_node::*unspecified_bool_type; +#endif + + /// \internal Initializing constructor + explicit xml_node(xml_node_struct* p); + + private: + template void all_elements_by_name_helper(const char_t* name, OutputIterator it) const + { + if (!_root) return; + + for (xml_node node = first_child(); node; node = node.next_sibling()) + { + if (node.type() == node_element) + { + if (impl::strequal(name, node.name())) + { + *it = node; + ++it; + } + + if (node.first_child()) node.all_elements_by_name_helper(name, it); + } + } + } + + template void all_elements_by_name_w_helper(const char_t* name, OutputIterator it) const + { + if (!_root) return; + + for (xml_node node = first_child(); node; node = node.next_sibling()) + { + if (node.type() == node_element) + { + if (impl::strequalwild(name, node.name())) + { + *it = node; + ++it; + } + + if (node.first_child()) node.all_elements_by_name_w_helper(name, it); + } + } + } + + public: + /** + * Default constructor. Constructs an empty node. + */ + xml_node(); + + public: + /** + * Safe bool conversion. + * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. + */ + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator==(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator!=(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<=(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>=(const xml_node& r) const; + + public: + /** + * Node iterator type (for child nodes). + * \see xml_node_iterator + */ + typedef xml_node_iterator iterator; + + /** + * Node iterator type (for child nodes). + * \see xml_attribute_iterator + */ + typedef xml_attribute_iterator attribute_iterator; + + /** + * Access the begin iterator for this node's collection of child nodes. + * + * \return iterator that points to the first child node, or past-the-end iterator if node is empty or has no children + */ + iterator begin() const; + + /** + * Access the end iterator for this node's collection of child nodes. + * + * \return past-the-end iterator for child list + */ + iterator end() const; + + /** + * Access the begin iterator for this node's collection of attributes. + * + * \return iterator that points to the first attribute, or past-the-end iterator if node is empty or has no attributes + */ + attribute_iterator attributes_begin() const; + + /** + * Access the end iterator for this node's collection of attributes. + * + * \return past-the-end iterator for attribute list + */ + attribute_iterator attributes_end() const; + + public: + /** + * Check if node is empty. + * + * \return true if node is empty, false otherwise + */ + bool empty() const; + + public: + /** + * Get node type + * + * \return node type; node_null for empty nodes + */ + xml_node_type type() const; + + /** + * Get node name (element name for element nodes, PI target for PI) + * + * \return node name, if any; "" otherwise + */ + const char_t* name() const; + + /** + * Get node value (comment/PI/PCDATA/CDATA contents, depending on node type) + * + * \return node value, if any; "" otherwise + */ + const char_t* value() const; + + /** + * Get child with the specified name + * + * \param name - child name + * \return child with the specified name, if any; empty node otherwise + */ + xml_node child(const char_t* name) const; + + /** + * Get child with the name that matches specified pattern + * + * \param name - child name pattern + * \return child with the name that matches pattern, if any; empty node otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node child_w(const char_t* name) const; + + /** + * Get attribute with the specified name + * + * \param name - attribute name + * \return attribute with the specified name, if any; empty attribute otherwise + */ + xml_attribute attribute(const char_t* name) const; + + /** + * Get attribute with the name that matches specified pattern + * + * \param name - attribute name pattern + * \return attribute with the name that matches pattern, if any; empty attribute otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_attribute attribute_w(const char_t* name) const; + + /** + * Get first of following sibling nodes with the specified name + * + * \param name - sibling name + * \return node with the specified name, if any; empty node otherwise + */ + xml_node next_sibling(const char_t* name) const; + + /** + * Get first of the following sibling nodes with the name that matches specified pattern + * + * \param name - sibling name pattern + * \return node with the name that matches pattern, if any; empty node otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node next_sibling_w(const char_t* name) const; + + /** + * Get following sibling + * + * \return following sibling node, if any; empty node otherwise + */ + xml_node next_sibling() const; + + /** + * Get first of preceding sibling nodes with the specified name + * + * \param name - sibling name + * \return node with the specified name, if any; empty node otherwise + */ + xml_node previous_sibling(const char_t* name) const; + + /** + * Get first of the preceding sibling nodes with the name that matches specified pattern + * + * \param name - sibling name pattern + * \return node with the name that matches pattern, if any; empty node otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node previous_sibling_w(const char_t* name) const; + + /** + * Get preceding sibling + * + * \return preceding sibling node, if any; empty node otherwise + */ + xml_node previous_sibling() const; + + /** + * Get parent node + * + * \return parent node if any; empty node otherwise + */ + xml_node parent() const; + + /** + * Get root of DOM tree this node belongs to. + * + * \return tree root + */ + xml_node root() const; + + /** + * Get child value of current node; that is, value of the first child node of type PCDATA/CDATA + * + * \return child value of current node, if any; "" otherwise + */ + const char_t* child_value() const; + + /** + * Get child value of child with specified name. \see child_value + * node.child_value(name) is equivalent to node.child(name).child_value() + * + * \param name - child name + * \return child value of specified child node, if any; "" otherwise + */ + const char_t* child_value(const char_t* name) const; + + /** + * Get child value of child with name that matches the specified pattern. \see child_value + * node.child_value_w(name) is equivalent to node.child_w(name).child_value() + * + * \param name - child name pattern + * \return child value of specified child node, if any; "" otherwise + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED const char_t* child_value_w(const char_t* name) const; + + public: + /** + * Set node name to \a rhs (for PI/element nodes). \see name + * + * \param rhs - new node name + * \return success flag (call fails if node is of the wrong type or there is not enough memory) + */ + bool set_name(const char_t* rhs); + + /** + * Set node value to \a rhs (for PI/PCDATA/CDATA/comment nodes). \see value + * + * \param rhs - new node value + * \return success flag (call fails if node is of the wrong type or there is not enough memory) + */ + bool set_value(const char_t* rhs); + + /** + * Add attribute with specified name (for element nodes) + * + * \param name - attribute name + * \return added attribute, or empty attribute if there was an error (wrong node type) + */ + xml_attribute append_attribute(const char_t* name); + + /** + * Insert attribute with specified name after \a attr (for element nodes) + * + * \param name - attribute name + * \param attr - attribute to insert a new one after + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr); + + /** + * Insert attribute with specified name before \a attr (for element nodes) + * + * \param name - attribute name + * \param attr - attribute to insert a new one before + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr); + + /** + * Add a copy of the specified attribute (for element nodes) + * + * \param proto - attribute prototype which is to be copied + * \return inserted attribute, or empty attribute if there was an error (wrong node type) + */ + xml_attribute append_copy(const xml_attribute& proto); + + /** + * Insert a copy of the specified attribute after \a attr (for element nodes) + * + * \param proto - attribute prototype which is to be copied + * \param attr - attribute to insert a new one after + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); + + /** + * Insert a copy of the specified attribute before \a attr (for element nodes) + * + * \param proto - attribute prototype which is to be copied + * \param attr - attribute to insert a new one before + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); + + /** + * Add child node with specified type (for element nodes) + * + * \param type - node type + * \return added node, or empty node if there was an error (wrong node type) + */ + xml_node append_child(xml_node_type type = node_element); + + /** + * Insert child node with specified type after \a node (for element nodes) + * + * \param type - node type + * \param node - node to insert a new one after + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_child_after(xml_node_type type, const xml_node& node); + + /** + * Insert child node with specified type before \a node (for element nodes) + * + * \param type - node type + * \param node - node to insert a new one before + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_child_before(xml_node_type type, const xml_node& node); + + /** + * Add a copy of the specified node as a child (for element nodes) + * + * \param proto - node prototype which is to be copied + * \return inserted node, or empty node if there was an error (wrong node type) + */ + xml_node append_copy(const xml_node& proto); + + /** + * Insert a copy of the specified node after \a node (for element nodes) + * + * \param proto - node prototype which is to be copied + * \param node - node to insert a new one after + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_copy_after(const xml_node& proto, const xml_node& node); + + /** + * Insert a copy of the specified node before \a node (for element nodes) + * + * \param proto - node prototype which is to be copied + * \param node - node to insert a new one before + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_copy_before(const xml_node& proto, const xml_node& node); + + /** + * Remove specified attribute + * + * \param a - attribute to be removed + * \return success flag + */ + bool remove_attribute(const xml_attribute& a); + + /** + * Remove attribute with the specified name, if any + * + * \param name - attribute name + * \return success flag + */ + bool remove_attribute(const char_t* name); + + /** + * Remove specified child + * + * \param n - child node to be removed + * \return success flag + */ + bool remove_child(const xml_node& n); + + /** + * Remove child with the specified name, if any + * + * \param name - child name + * \return success flag + */ + bool remove_child(const char_t* name); + + public: + /** + * Get first attribute + * + * \return first attribute, if any; empty attribute otherwise + */ + xml_attribute first_attribute() const; + + /** + * Get last attribute + * + * \return last attribute, if any; empty attribute otherwise + */ + xml_attribute last_attribute() const; + + /** + * Get all elements from subtree with given name + * + * \param name - node name + * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) + * + * \deprecated This function is deprecated + */ + template PUGIXML_DEPRECATED void all_elements_by_name(const char_t* name, OutputIterator it) const + { + all_elements_by_name_helper(name, it); + } + + /** + * Get all elements from subtree with name that matches given pattern + * + * \param name - node name pattern + * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) + * + * \deprecated This function is deprecated + */ + template PUGIXML_DEPRECATED void all_elements_by_name_w(const char_t* name, OutputIterator it) const + { + all_elements_by_name_w_helper(name, it); + } + + /** + * Get first child + * + * \return first child, if any; empty node otherwise + */ + xml_node first_child() const; + + /** + * Get last child + * + * \return last child, if any; empty node otherwise + */ + xml_node last_child() const; + + /** + * Find attribute using predicate + * + * \param pred - predicate, that takes xml_attribute and returns bool + * \return first attribute for which predicate returned true, or empty attribute + */ + template xml_attribute find_attribute(Predicate pred) const + { + if (!_root) return xml_attribute(); + + for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute()) + if (pred(attrib)) + return attrib; + + return xml_attribute(); + } + + /** + * Find child node using predicate + * + * \param pred - predicate, that takes xml_node and returns bool + * \return first child node for which predicate returned true, or empty node + */ + template xml_node find_child(Predicate pred) const + { + if (!_root) return xml_node(); + + for (xml_node node = first_child(); node; node = node.next_sibling()) + if (pred(node)) + return node; + + return xml_node(); + } + + /** + * Find node from subtree using predicate + * + * \param pred - predicate, that takes xml_node and returns bool + * \return first node from subtree for which predicate returned true, or empty node + */ + template xml_node find_node(Predicate pred) const + { + if (!_root) return xml_node(); + + for (xml_node node = first_child(); node; node = node.next_sibling()) + { + if (pred(node)) + return node; + + if (node.first_child()) + { + xml_node found = node.find_node(pred); + if (found) return found; + } + } + + return xml_node(); + } + + /** + * Find child node with the specified name that has specified attribute + * + * \param name - child node name + * \param attr_name - attribute name of child node + * \param attr_value - attribute value of child node + * \return first matching child node, or empty node + */ + xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; + + /** + * Find child node with the specified name that has specified attribute (use pattern matching for node name and attribute name/value) + * + * \param name - pattern for child node name + * \param attr_name - pattern for attribute name of child node + * \param attr_value - pattern for attribute value of child node + * \return first matching child node, or empty node + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; + + /** + * Find child node that has specified attribute + * + * \param attr_name - attribute name of child node + * \param attr_value - attribute value of child node + * \return first matching child node, or empty node + */ + xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; + + /** + * Find child node that has specified attribute (use pattern matching for attribute name/value) + * + * \param attr_name - pattern for attribute name of child node + * \param attr_value - pattern for attribute value of child node + * \return first matching child node, or empty node + * + * \deprecated This function is deprecated + */ + PUGIXML_DEPRECATED xml_node find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const; + + #ifndef PUGIXML_NO_STL + /** + * Get the absolute node path from root as a text string. + * + * \param delimiter - delimiter character to insert between element names + * \return path string (e.g. '/bookstore/book/author'). + */ + string_t path(char_t delimiter = '/') const; + #endif + + /** + * Search for a node by path. + * \param path - path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative + * to root), '../foo/bar'. + * \param delimiter - delimiter character to use while tokenizing path + * \return matching node, if any; empty node otherwise + */ + xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const; + + /** + * Recursively traverse subtree with xml_tree_walker + * \see xml_tree_walker::begin + * \see xml_tree_walker::for_each + * \see xml_tree_walker::end + * + * \param walker - tree walker to traverse subtree with + * \return traversal result + */ + bool traverse(xml_tree_walker& walker); + + #ifndef PUGIXML_NO_XPATH + /** + * Select single node by evaluating XPath query + * + * \param query - query string + * \return first node from the resulting node set by document order, or empty node if none found + */ + xpath_node select_single_node(const char_t* query) const; + + /** + * Select single node by evaluating XPath query + * + * \param query - compiled query + * \return first node from the resulting node set by document order, or empty node if none found + */ + xpath_node select_single_node(const xpath_query& query) const; + + /** + * Select node set by evaluating XPath query + * + * \param query - query string + * \return resulting node set + */ + xpath_node_set select_nodes(const char_t* query) const; + + /** + * Select node set by evaluating XPath query + * + * \param query - compiled query + * \return resulting node set + */ + xpath_node_set select_nodes(const xpath_query& query) const; + #endif + + /// \internal Document order or 0 if not set + /// \deprecated This function is deprecated + PUGIXML_DEPRECATED unsigned int document_order() const; + + /** + * Print subtree to writer + * + * \param writer - writer object + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + * \param depth - starting depth (used for indentation) + */ + void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + + #ifndef PUGIXML_NO_STL + /** + * Print subtree to stream + * + * \param os - output stream + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + * \param depth - starting depth (used for indentation) + */ + void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + + /** + * Print subtree to stream + * + * \param os - output stream + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + * \param depth - starting depth (used for indentation) + */ + void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const; + #endif + + /** + * Get node offset in parsed file/string (in bytes) for debugging purposes + * + * \return offset in bytes to start of node data, or -1 in case of error + * \note This will return -1 if node information changed to the extent that it's no longer possible to calculate offset, for example + * if element node name has significantly changed; this is guaranteed to return correct offset only for nodes that have not changed + * since parsing. + */ + ptrdiff_t offset_debug() const; + }; + +#ifdef __BORLANDC__ + // Borland C++ workaround + bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs); + bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs); +#endif + + /** + * Child node iterator. + * It's a bidirectional iterator with value type 'xml_node'. + */ + class PUGIXML_CLASS xml_node_iterator + { + friend class xml_node; + + private: + xml_node _wrap; + xml_node _parent; + + /// \internal Initializing constructor + xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent); + + public: + /** + * Iterator traits + */ + typedef ptrdiff_t difference_type; + typedef xml_node value_type; + typedef xml_node* pointer; + typedef xml_node& reference; + + #ifndef PUGIXML_NO_STL + typedef std::bidirectional_iterator_tag iterator_category; + #endif + + /** + * Default constructor + */ + xml_node_iterator(); + + /** + * Initializing constructor + * + * \param node - node that iterator will point at + */ + xml_node_iterator(const xml_node& node); + + /** + * Check if this iterator is equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator==(const xml_node_iterator& rhs) const; + + /** + * Check if this iterator is not equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator!=(const xml_node_iterator& rhs) const; + + /** + * Dereferencing operator + * + * \return reference to the node iterator points at + */ + xml_node& operator*(); + + /** + * Member access operator + * + * \return pointer to the node iterator points at + */ + xml_node* operator->(); + + /** + * Pre-increment operator + * + * \return self + */ + const xml_node_iterator& operator++(); + + /** + * Post-increment operator + * + * \return old value + */ + xml_node_iterator operator++(int); + + /** + * Pre-decrement operator + * + * \return self + */ + const xml_node_iterator& operator--(); + + /** + * Post-decrement operator + * + * \return old value + */ + xml_node_iterator operator--(int); + }; + + /** + * Attribute iterator. + * It's a bidirectional iterator with value type 'xml_attribute'. + */ + class PUGIXML_CLASS xml_attribute_iterator + { + friend class xml_node; + + private: + xml_attribute _wrap; + xml_node _parent; + + /// \internal Initializing constructor + xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent); + + public: + /** + * Iterator traits + */ + typedef ptrdiff_t difference_type; + typedef xml_attribute value_type; + typedef xml_attribute* pointer; + typedef xml_attribute& reference; + + #ifndef PUGIXML_NO_STL + typedef std::bidirectional_iterator_tag iterator_category; + #endif + + /** + * Default constructor + */ + xml_attribute_iterator(); + + /** + * Initializing constructor + * + * \param attr - attribute that iterator will point at + * \param parent - parent node of the attribute + */ + xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent); + + /** + * Check if this iterator is equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator==(const xml_attribute_iterator& rhs) const; + + /** + * Check if this iterator is not equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator!=(const xml_attribute_iterator& rhs) const; + + /** + * Dereferencing operator + * + * \return reference to the node iterator points at + */ + xml_attribute& operator*(); + + /** + * Member access operator + * + * \return pointer to the node iterator points at + */ + xml_attribute* operator->(); + + /** + * Pre-increment operator + * + * \return self + */ + const xml_attribute_iterator& operator++(); + + /** + * Post-increment operator + * + * \return old value + */ + xml_attribute_iterator operator++(int); + + /** + * Pre-decrement operator + * + * \return self + */ + const xml_attribute_iterator& operator--(); + + /** + * Post-decrement operator + * + * \return old value + */ + xml_attribute_iterator operator--(int); + }; + + /** + * Abstract tree walker class + * \see xml_node::traverse + */ + class PUGIXML_CLASS xml_tree_walker + { + friend class xml_node; + + private: + int _depth; + + protected: + /** + * Get node depth + * + * \return node depth + */ + int depth() const; + + public: + /** + * Default constructor + */ + xml_tree_walker(); + + /** + * Virtual destructor + */ + virtual ~xml_tree_walker(); + + public: + /** + * Callback that is called when traversal of node begins. + * + * \return returning false will abort the traversal + */ + virtual bool begin(xml_node&); + + /** + * Callback that is called for each node traversed + * + * \return returning false will abort the traversal + */ + virtual bool for_each(xml_node&) = 0; + + /** + * Callback that is called when traversal of node ends. + * + * \return returning false will abort the traversal + */ + virtual bool end(xml_node&); + }; + + /** + * Struct used to distinguish parsing with ownership transfer from parsing without it. + * \see xml_document::parse + */ + struct transfer_ownership_tag {}; + + /** + * Parsing status enumeration, returned as part of xml_parse_result struct + */ + enum xml_parse_status + { + status_ok = 0, ///< No error + + status_file_not_found, ///< File was not found during load_file() + status_io_error, ///< Error reading from file/stream + status_out_of_memory, ///< Could not allocate memory + status_internal_error, ///< Internal error occurred + + status_unrecognized_tag, ///< Parser could not determine tag type + + status_bad_pi, ///< Parsing error occurred while parsing document declaration/processing instruction () + status_bad_comment, ///< Parsing error occurred while parsing comment () + status_bad_cdata, ///< Parsing error occurred while parsing CDATA section () + status_bad_doctype, ///< Parsing error occurred while parsing document type declaration + status_bad_pcdata, ///< Parsing error occurred while parsing PCDATA section (>...<) + status_bad_start_element, ///< Parsing error occurred while parsing start element tag () + status_bad_attribute, ///< Parsing error occurred while parsing element attribute + status_bad_end_element, ///< Parsing error occurred while parsing end element tag () + status_end_element_mismatch ///< There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) + }; + + /** + * Parser result + */ + struct PUGIXML_CLASS xml_parse_result + { + /// Parsing status (\see xml_parse_status) + xml_parse_status status; + + /// Last parsed offset (in bytes from file/string start) + ptrdiff_t offset; + + /// Source document encoding + xml_encoding encoding; + + /// Cast to bool operator + operator bool() const + { + return status == status_ok; + } + + /// Get error description + const char* description() const; + }; + + /** + * Document class (DOM tree root). + * This class has non-copyable semantics (private copy constructor/assignment operator). + */ + class PUGIXML_CLASS xml_document: public xml_node + { + private: + char_t* _buffer; + + char _memory[192]; + + xml_document(const xml_document&); + const xml_document& operator=(const xml_document&); + + void create(); + void destroy(); + + xml_parse_result load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own); + + public: + /** + * Default constructor, makes empty document + */ + xml_document(); + + /** + * Destructor + */ + ~xml_document(); + + public: + #ifndef PUGIXML_NO_STL + /** + * Load document from stream. + * + * \param stream - stream with XML data + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Load document from stream. + * + * \param stream - stream with XML data + * \param options - parsing options + * \return parsing result + */ + xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default); + #endif + + /** + * Load document from string. String has to be zero-terminated. No encoding conversions are applied. + * + * \param contents - input string + * \param options - parsing options + * \return parsing result + */ + xml_parse_result load(const char_t* contents, unsigned int options = parse_default); + + /** + * Parse the given XML string in-situ. + * The string is modified; you should ensure that string data will persist throughout the + * document's lifetime. Although, document does not gain ownership over the string, so you + * should free the memory occupied by it manually. + * + * \param xmlstr - read/write string with XML data + * \param options - parsing options + * \return parsing result + * + * \deprecated This function is deprecated and will be removed in future versions; use xml_document::load_buffer_inplace instead + */ + PUGIXML_DEPRECATED xml_parse_result parse(char* xmlstr, unsigned int options = parse_default); + + /** + * Parse the given XML string in-situ (gains ownership). + * The string is modified; document gains ownership over the string, so you don't have to worry + * about it's lifetime. + * Call example: doc.parse(transfer_ownership_tag(), string, options); + * + * \param xmlstr - read/write string with XML data + * \param options - parsing options + * \return parsing result + * + * \deprecated This function is deprecated and will be removed in future versions; use xml_document::load_buffer_inplace_own instead + */ + PUGIXML_DEPRECATED xml_parse_result parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options = parse_default); + + /** + * Load document from file + * + * \param path - file path + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Load document from buffer + * + * \param contents - buffer contents + * \param size - buffer size in bytes + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Load document from buffer in-situ. + * The buffer is modified; you should ensure that buffer data will persist throughout the document's + * lifetime. Document does not gain ownership over the buffer, so you should free the buffer memory manually. + * + * \param contents - buffer contents + * \param size - buffer size in bytes + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Load document from buffer in-situ (gains buffer ownership). + * The buffer is modified; you should ensure that buffer data will persist throughout the document's + * lifetime. Document gains ownership over the buffer, so you should allocate the buffer with pugixml + * allocation function. + * + * \param contents - buffer contents + * \param size - buffer size in bytes + * \param options - parsing options + * \param encoding - source data encoding + * \return parsing result + */ + xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + /** + * Save XML to writer + * + * \param writer - writer object + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + */ + void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + + #ifndef PUGIXML_NO_STL + /** + * Save XML to stream + * + * \param stream - output stream + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + */ + void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + + /** + * Save XML to stream + * + * \param stream - output stream + * \param indent - indentation string + * \param flags - formatting flags + */ + void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const; + #endif + + /** + * Save XML to file + * + * \param path - file path + * \param indent - indentation string + * \param flags - formatting flags + * \param encoding - encoding used for writing + * \return success flag + */ + bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + + /** + * Compute document order for the whole tree + * Sometimes this makes evaluation of XPath queries faster. + */ + PUGIXML_DEPRECATED void precompute_document_order(); + }; + +#ifndef PUGIXML_NO_XPATH + /** + * XPath exception class. + */ + class PUGIXML_CLASS xpath_exception: public std::exception + { + private: + const char* m_message; + + public: + /** + * Construct exception from static error string + * + * \param message - error string + */ + explicit xpath_exception(const char* message); + + /** + * Return error message + * + * \return error message + */ + virtual const char* what() const throw(); + }; + + /** + * XPath node class. + * + * XPath defines node to be either xml_node or xml_attribute in pugixml terminology, so xpath_node + * is either xml_node or xml_attribute. + */ + class PUGIXML_CLASS xpath_node + { + private: + xml_node m_node; + xml_attribute m_attribute; + + /// \internal Safe bool type + typedef xml_node xpath_node::*unspecified_bool_type; + + public: + /** + * Construct empty XPath node + */ + xpath_node(); + + /** + * Construct XPath node from XML node + * + * \param node - XML node + */ + xpath_node(const xml_node& node); + + /** + * Construct XPath node from XML attribute + * + * \param attribute - XML attribute + * \param parent - attribute's parent node + */ + xpath_node(const xml_attribute& attribute, const xml_node& parent); + + /** + * Get XML node, if any + * + * \return contained XML node, empty node otherwise + */ + xml_node node() const; + + /** + * Get XML attribute, if any + * + * \return contained XML attribute, if any, empty attribute otherwise + */ + xml_attribute attribute() const; + + /** + * Get parent of contained XML attribute, if any + * + * \return parent of contained XML attribute, if any, empty node otherwise + */ + xml_node parent() const; + + /** + * Safe bool conversion. + * Allows xpath_node to be used in a context where boolean variable is expected, such as 'if (node)'. + */ + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + /** + * Compares two XPath nodes + * + * \param n - XPath node to compare to + * \return comparison result + */ + bool operator==(const xpath_node& n) const; + + /** + * Compares two XPath nodes + * + * \param n - XPath node to compare to + * \return comparison result + */ + bool operator!=(const xpath_node& n) const; + }; + +#ifdef __BORLANDC__ + // Borland C++ workaround + bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs); + bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs); +#endif + + /** + * Not necessarily ordered constant collection of XPath nodes + */ + class PUGIXML_CLASS xpath_node_set + { + friend class xpath_ast_node; + + public: + /// Collection type + enum type_t + { + type_unsorted, ///< Not ordered + type_sorted, ///< Sorted by document order (ascending) + type_sorted_reverse ///< Sorted by document order (descending) + }; + + /// Constant iterator type + typedef const xpath_node* const_iterator; + + private: + type_t m_type; + + xpath_node m_storage; + + xpath_node* m_begin; + xpath_node* m_end; + xpath_node* m_eos; + + typedef xpath_node* iterator; + + iterator mut_begin(); + + void push_back(const xpath_node& n); + + void append(const_iterator begin, const_iterator end); + + void truncate(iterator it); + + void remove_duplicates(); + + public: + /** + * Default constructor + * Constructs empty set + */ + xpath_node_set(); + + /** + * Destructor + */ + ~xpath_node_set(); + + /** + * Copy constructor + * + * \param ns - set to copy + */ + xpath_node_set(const xpath_node_set& ns); + + /** + * Assignment operator + * + * \param ns - set to assign + * \return self + */ + xpath_node_set& operator=(const xpath_node_set& ns); + + /** + * Get collection type + * + * \return collection type + */ + type_t type() const; + + /** + * Get collection size + * + * \return collection size + */ + size_t size() const; + + /** + * Get element with the specified index + * + * \param index - requested index + * \return element + */ + const xpath_node& operator[](size_t index) const; + + /** + * Get begin constant iterator for collection + * + * \return begin constant iterator + */ + const_iterator begin() const; + + /** + * Get end iterator for collection + * + * \return end iterator + */ + const_iterator end() const; + + /** + * Sort the collection in ascending/descending order by document order + * + * \param reverse - whether to sort in ascending (false) or descending (true) order + */ + void sort(bool reverse = false); + + /** + * Get first node in the collection by document order + * + * \return first node by document order + * \note set.first() is not equal to set[0], since operator[] does not take document order into account + */ + xpath_node first() const; + + /** + * Return true if collection is empty + * + * \return true if collection is empty, false otherwise + */ + bool empty() const; + }; +#endif + +#ifndef PUGIXML_NO_STL + /** + * Convert wide string to UTF8 + * + * \param str - input wide string string + * \return output UTF8 string + */ + std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const wchar_t* str); + + /** + * Convert UTF8 to wide string + * + * \param str - input UTF8 string + * \return output wide string string + * + * \deprecated This function is deprecated and will be removed in future versions; use as_wide instead + */ + PUGIXML_DEPRECATED std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf16(const char* str); + + /** + * Convert UTF8 to wide string + * + * \param str - input UTF8 string + * \return output wide string string + */ + std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const char* str); +#endif + + /** + * Memory allocation function + * + * \param size - allocation size + * \return pointer to allocated memory on success, NULL on failure + */ + typedef void* (*allocation_function)(size_t size); + + /** + * Memory deallocation function + * + * \param ptr - pointer to memory previously allocated by allocation function + */ + typedef void (*deallocation_function)(void* ptr); + + /** + * Override default memory management functions + * + * All subsequent allocations/deallocations will be performed via supplied functions. Take care not to + * change memory management functions if any xml_document instances are still alive - this is considered + * undefined behaviour (expect crashes/memory damages/etc.). + * + * \param allocate - allocation function + * \param deallocate - deallocation function + * + * \note XPath-related allocations, as well as allocations in functions that return std::string (xml_node::path, as_utf8, as_wide) + * are not performed via these functions. + * \note If you're using parse() with ownership transfer, you have to allocate the buffer you pass to parse() with allocation + * function you set via this function. + */ + void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); + + /** + * Get current memory allocation function + * + * \return memory allocation function + * \see set_memory_management_functions + */ + allocation_function PUGIXML_FUNCTION get_memory_allocation_function(); + + /** + * Get current memory deallocation function + * + * \return memory deallocation function + * \see set_memory_management_functions + */ + deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function(); +} + +#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) +namespace std +{ + // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&); + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&); +} +#endif + +#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) +namespace std +{ + // Workarounds for (non-standard) iterator category detection + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&); + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&); +} +#endif + +#endif + +/** + * Copyright (c) 2006-2010 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ diff --git a/src/pugixpath.cpp b/src/pugixpath.cpp index 8ebc8c8..775706c 100644 --- a/src/pugixpath.cpp +++ b/src/pugixpath.cpp @@ -1,3500 +1,3500 @@ -/** - * pugixml parser - version 0.9 - * -------------------------------------------------------- - * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) - * Report bugs and download new versions at http://code.google.com/p/pugixml/ - * - * This library is distributed under the MIT License. See notice at the end - * of this file. - * - * This work is based on the pugxml parser, which is: - * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) - */ - -#include "pugixml.hpp" - -#ifndef PUGIXML_NO_XPATH - -#include - -#include -#include -#include -#include -#include -#include - -#ifdef PUGIXML_WCHAR_MODE -# include -#endif - -// int32_t -#if !defined(_MSC_VER) || _MSC_VER >= 1600 -# include -#else -typedef __int32 int32_t; -#endif - -#if defined(_MSC_VER) -# pragma warning(disable: 4127) // conditional expression is constant -# pragma warning(disable: 4996) // this function or variable may be unsafe -#endif - -#ifdef __INTEL_COMPILER -# pragma warning(disable: 1478 1786) // function was declared "deprecated" -#endif - -#ifdef __SNC__ -# pragma diag_suppress=237 // controlling expression is constant -#endif - -#include -#include - -// String utilities prototypes -namespace pugi -{ - namespace impl - { - bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); - void widen_ascii(wchar_t* dest, const char* source); - } -} - -namespace -{ - using namespace pugi; - - enum chartypex - { - ctx_space = 1, // \r, \n, space, tab - ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _ - ctx_digit = 4, // 0-9 - ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . - }; - - const unsigned char chartypex_table[256] = - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47 - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63 - 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79 - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95 - 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111 - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127 - - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 - }; - -#ifdef PUGIXML_WCHAR_MODE - #define IS_CHARTYPEX(c, ct) ((static_cast(c) < 128 ? chartypex_table[static_cast(c)] : chartypex_table[128]) & (ct)) -#else - #define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast(c)] & (ct)) -#endif - - bool starts_with(const char_t* string, const char_t* pattern) - { - while (*pattern && *string == *pattern) - { - string++; - pattern++; - } - - return *pattern == 0; - } - - const char_t* find_char(const char_t* s, char_t c) - { - #ifdef PUGIXML_WCHAR_MODE - return wcschr(s, c); - #else - return ::strchr(s, c); - #endif - } - - string_t string_value(const xpath_node& na) - { - if (na.attribute()) - return na.attribute().value(); - else - { - const xml_node& n = na.node(); - - switch (n.type()) - { - case node_pcdata: - case node_cdata: - case node_comment: - case node_pi: - return n.value(); - - case node_document: - case node_element: - { - string_t result; - - xml_node cur = n.first_child(); - - if (cur) - { - do - { - if (cur.type() == node_pcdata || cur.type() == node_cdata) - result += cur.value(); - - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - // Borland C++ workaround - while (!cur.next_sibling() && cur != n && (bool)cur.parent()) - cur = cur.parent(); - - if (cur != n) - cur = cur.next_sibling(); - } - } - while (cur && cur != n); - } - - return result; - } - - default: - return string_t(); - } - } - } - - unsigned int node_height(xml_node n) - { - unsigned int result = 0; - - while (n) - { - ++result; - n = n.parent(); - } - - return result; - } - - // precondition: node_height of ln is <= node_height of rn, ln != rn - bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh) - { - assert(lh <= rh); - - while (lh < rh) - { - --rh; - rn = rn.parent(); - } - - if (ln == rn) return true; - - while (ln.parent() != rn.parent()) - { - ln = ln.parent(); - rn = rn.parent(); - } - - for (; ln; ln = ln.next_sibling()) - if (ln == rn) - return true; - - return false; - } - - bool node_is_ancestor(xml_node parent, xml_node node) - { - while (node && node != parent) node = node.parent(); - - return parent && node == parent; - } - - struct document_order_comparator - { - bool operator()(const xpath_node& lhs, const xpath_node& rhs) const - { - xml_node ln = lhs.node(), rn = rhs.node(); - - if (lhs.attribute() && rhs.attribute()) - { - if (lhs.parent() == rhs.parent()) - { - for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) - if (a == rhs.attribute()) - return true; - - return false; - } - - ln = lhs.parent(); - rn = rhs.parent(); - } - else if (lhs.attribute()) - { - if (lhs.parent() == rhs.node()) return false; - - ln = lhs.parent(); - } - else if (rhs.attribute()) - { - if (rhs.parent() == lhs.node()) return true; - - rn = rhs.parent(); - } - - if (ln == rn) return false; - - unsigned int lh = node_height(ln); - unsigned int rh = node_height(rn); - - return (lh <= rh) ? node_is_before(ln, lh, rn, rh) : !node_is_before(rn, rh, ln, lh); - } - }; - - struct duplicate_comparator - { - bool operator()(const xpath_node& lhs, const xpath_node& rhs) const - { - if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; - else return rhs.attribute() ? false : lhs.node() < rhs.node(); - } - }; - - double gen_nan() - { - #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) - union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1]; - u[0].i = 0x7fc00000; - return u[0].f; - #else - // fallback - const volatile double zero = 0.0; - return zero / zero; - #endif - } - - bool is_nan(double value) - { - #if defined(_MSC_VER) || defined(__BORLANDC__) - return !!_isnan(value); - #elif defined(fpclassify) && defined(FP_NAN) - return fpclassify(value) == FP_NAN; - #else - // fallback - const volatile double v = value; - return v != v; - #endif - } - - const char_t* convert_number_to_string_special(double value) - { - #if defined(_MSC_VER) || defined(__BORLANDC__) - if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; - if (_isnan(value)) return PUGIXML_TEXT("NaN"); - return PUGIXML_TEXT("-Infinity") + (value > 0); - #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) - switch (fpclassify(value)) - { - case FP_NAN: - return PUGIXML_TEXT("NaN"); - - case FP_INFINITE: - return PUGIXML_TEXT("-Infinity") + (value > 0); - - case FP_ZERO: - return PUGIXML_TEXT("0"); - - default: - return 0; - } - #else - // fallback - const volatile double v = value; - - if (v == 0) return PUGIXML_TEXT("0"); - if (v != v) return PUGIXML_TEXT("NaN"); - if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0); - return 0; - #endif - } - - bool convert_number_to_boolean(double value) - { - return (value != 0 && !is_nan(value)); - } - - // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent - void convert_number_to_mantissa_exponent(double value, char* buffer, char** out_mantissa, int* out_exponent) - { - // get a scientific notation value with IEEE DBL_DIG decimals - sprintf(buffer, "%.15e", value); - - // get the exponent (possibly negative) - char* exponent_string = strchr(buffer, 'e'); - assert(exponent_string); - - int exponent = atoi(exponent_string + 1); - - // extract mantissa string: skip sign - char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; - assert(mantissa[0] != '0' && mantissa[1] == '.'); - - // divide mantissa by 10 to eliminate integer part - mantissa[1] = mantissa[0]; - mantissa++; - exponent++; - - // remove extra mantissa digits and zero-terminate mantissa - char* mantissa_end = exponent_string; - - while (mantissa != mantissa_end && *(mantissa_end - 1) == '0') --mantissa_end; - - *mantissa_end = 0; - - // fill results - *out_mantissa = mantissa; - *out_exponent = exponent; - } - - string_t convert_number_to_string(double value) - { - // try special number conversion - const char_t* special = convert_number_to_string_special(value); - if (special) return special; - - // get mantissa + exponent form - char mantissa_buffer[64]; - - char* mantissa; - int exponent; - convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); - - // make the number! - char_t result[512]; - char_t* s = result; - - // sign - if (value < 0) *s++ = '-'; - - // integer part - if (exponent <= 0) - { - *s++ = '0'; - } - else - { - while (exponent > 0) - { - assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9); - *s++ = *mantissa ? *mantissa++ : '0'; - exponent--; - } - } - - // fractional part - if (*mantissa) - { - // decimal point - *s++ = '.'; - - // extra zeroes from negative exponent - while (exponent < 0) - { - *s++ = '0'; - exponent++; - } - - // extra mantissa digits - while (*mantissa) - { - assert((unsigned)(*mantissa - '0') <= 9); - *s++ = *mantissa++; - } - } - - // zero-terminate - assert(s < result + sizeof(result) / sizeof(result[0])); - *s = 0; - - return string_t(result); - } - - bool check_string_to_number_format(const char_t* string) - { - // parse leading whitespace - while (IS_CHARTYPEX(*string, ctx_space)) ++string; - - // parse sign - if (*string == '-') ++string; - - if (!*string) return false; - - // if there is no integer part, there should be a decimal part with at least one digit - if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false; - - // parse integer part - while (IS_CHARTYPEX(*string, ctx_digit)) ++string; - - // parse decimal part - if (*string == '.') - { - ++string; - - while (IS_CHARTYPEX(*string, ctx_digit)) ++string; - } - - // parse trailing whitespace - while (IS_CHARTYPEX(*string, ctx_space)) ++string; - - return *string == 0; - } - - double convert_string_to_number(const char_t* string) - { - // check string format - if (!check_string_to_number_format(string)) return gen_nan(); - - // parse string - #ifdef PUGIXML_WCHAR_MODE - return wcstod(string, 0); - #else - return atof(string); - #endif - } - - double convert_string_to_number(const char_t* begin, const char_t* end) - { - char_t buffer[32]; - - size_t length = static_cast(end - begin); - - if (length < sizeof(buffer) / sizeof(buffer[0])) - { - // optimized on-stack conversion - memcpy(buffer, begin, length * sizeof(char_t)); - buffer[length] = 0; - - return convert_string_to_number(buffer); - } - else - { - // need to make dummy on-heap copy - string_t copy(begin, end); - - return convert_string_to_number(copy.c_str()); - } - } - - double round_nearest(double value) - { - return floor(value + 0.5); - } - - double round_nearest_nzero(double value) - { - // same as round_nearest, but returns -0 for [-0.5, -0] - // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) - return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); - } - - const char_t* local_name(const char_t* name) - { - const char_t* p = find_char(name, ':'); - - return p ? p + 1 : name; - } - - const char_t* namespace_uri(const xml_node& node) - { - const char_t* pos = find_char(node.name(), ':'); - - string_t ns = PUGIXML_TEXT("xmlns"); - - if (pos) - { - ns += ':'; - ns.append(node.name(), pos); - } - - xml_node p = node; - - while (p) - { - xml_attribute a = p.attribute(ns.c_str()); - - if (a) return a.value(); - - p = p.parent(); - } - - return PUGIXML_TEXT(""); - } - - const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent) - { - const char_t* pos = find_char(attr.name(), ':'); - - // Default namespace does not apply to attributes - if (!pos) return PUGIXML_TEXT(""); - - string_t ns = PUGIXML_TEXT("xmlns:"); - ns.append(attr.name(), pos); - - xml_node p = parent; - - while (p) - { - xml_attribute a = p.attribute(ns.c_str()); - - if (a) return a.value(); - - p = p.parent(); - } - - return PUGIXML_TEXT(""); - } - - struct equal_to - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs == rhs; - } - }; - - struct not_equal_to - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs != rhs; - } - }; - - struct less - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs < rhs; - } - }; - - struct less_equal - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs <= rhs; - } - }; -} - -namespace pugi -{ - xpath_exception::xpath_exception(const char* message): m_message(message) - { - } - - const char* xpath_exception::what() const throw() - { - return m_message; - } - - const size_t xpath_memory_block_size = 4096; ///< Memory block size, 4 kb - - class xpath_allocator - { - // disable copy ctor and assignment - xpath_allocator(const xpath_allocator&); - xpath_allocator& operator=(const xpath_allocator&); - - struct memory_block - { - memory_block(): next(0), size(0) - { - } - - memory_block* next; - size_t size; - - char data[xpath_memory_block_size]; - }; - - memory_block* m_root; - - public: - xpath_allocator(): m_root(0) - { - m_root = new memory_block; - } - - ~xpath_allocator() - { - while (m_root) - { - memory_block* cur = m_root->next; - delete m_root; - m_root = cur; - } - } - - void* alloc(size_t size) - { - // align size so that we're able to store pointers in subsequent blocks - size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1); - - if (m_root->size + size <= xpath_memory_block_size) - { - void* buf = m_root->data + m_root->size; - m_root->size += size; - return buf; - } - else - { - memory_block* block; - - if (size > xpath_memory_block_size) - block = static_cast(operator new(size + sizeof(memory_block) - xpath_memory_block_size)); - else - block = new memory_block; - - block->next = m_root; - block->size = size; - - m_root = block; - - return block->data; - } - } - - void* node(); - }; - - xpath_node::xpath_node() - { - } - - xpath_node::xpath_node(const xml_node& node): m_node(node) - { - } - - xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): m_node(parent), m_attribute(attribute) - { - } - - xml_node xpath_node::node() const - { - return m_attribute ? xml_node() : m_node; - } - - xml_attribute xpath_node::attribute() const - { - return m_attribute; - } - - xml_node xpath_node::parent() const - { - return m_attribute ? m_node : m_node.parent(); - } - - xpath_node::operator xpath_node::unspecified_bool_type() const - { - return (m_node || m_attribute) ? &xpath_node::m_node : 0; - } - - bool xpath_node::operator!() const - { - return !(m_node || m_attribute); - } - - bool xpath_node::operator==(const xpath_node& n) const - { - return m_node == n.m_node && m_attribute == n.m_attribute; - } - - bool xpath_node::operator!=(const xpath_node& n) const - { - return m_node != n.m_node || m_attribute != n.m_attribute; - } - -#ifdef __BORLANDC__ - bool operator&&(const xpath_node& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - bool operator||(const xpath_node& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - xpath_node_set::xpath_node_set(): m_type(type_unsorted), m_begin(&m_storage), m_end(&m_storage), m_eos(&m_storage + 1) - { - } - - xpath_node_set::~xpath_node_set() - { - if (m_begin != &m_storage) delete[] m_begin; - } - - xpath_node_set::xpath_node_set(const xpath_node_set& ns): m_type(type_unsorted), m_begin(&m_storage), m_end(&m_storage), m_eos(&m_storage + 1) - { - *this = ns; - } - - xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) - { - if (&ns == this) return *this; - - if (m_begin != &m_storage) delete[] m_begin; - - m_begin = m_end = m_eos = 0; - m_type = ns.m_type; - - if (ns.size() == 1) - { - m_storage = *ns.m_begin; - m_begin = &m_storage; - m_end = m_eos = &m_storage + 1; - } - else - { - append(ns.begin(), ns.end()); - } - - return *this; - } - - xpath_node_set::type_t xpath_node_set::type() const - { - return m_type; - } - - size_t xpath_node_set::size() const - { - return m_end - m_begin; - } - - bool xpath_node_set::empty() const - { - return size() == 0; - } - - const xpath_node& xpath_node_set::operator[](size_t index) const - { - assert(index < size()); - return m_begin[index]; - } - - xpath_node_set::iterator xpath_node_set::mut_begin() - { - return m_begin; - } - - xpath_node_set::const_iterator xpath_node_set::begin() const - { - return m_begin; - } - - xpath_node_set::const_iterator xpath_node_set::end() const - { - return m_end; - } - - void xpath_node_set::sort(bool reverse) - { - std::sort(m_begin, m_end, document_order_comparator()); - - if (reverse) - std::reverse(m_begin, m_end); - - m_type = reverse ? type_sorted_reverse : type_sorted; - } - - void xpath_node_set::push_back(const xpath_node& n) - { - if (m_end == m_eos) - append(&n, &n + 1); - else - { - *m_end = n; - ++m_end; - } - } - - void xpath_node_set::append(const_iterator begin, const_iterator end) - { - if (begin == end) return; - - size_t count = end - begin; - size_t size = m_end - m_begin; - size_t capacity = m_eos - m_begin; - - if (capacity < size + count) - { - if (capacity < 2) capacity = 2; - - while (capacity < size + count) capacity += capacity / 2; - - xpath_node* storage = new xpath_node[capacity]; - std::copy(m_begin, m_end, storage); - - if (m_begin != &m_storage) delete[] m_begin; - - m_begin = storage; - m_end = storage + size; - m_eos = storage + capacity; - } - - std::copy(begin, end, m_end); - m_end += count; - } - - void xpath_node_set::truncate(iterator it) - { - m_end = it; - } - - xpath_node xpath_node_set::first() const - { - if (empty()) return xpath_node(); - - switch (m_type) - { - case type_sorted: return *m_begin; - case type_sorted_reverse: return *(m_end - 1); - case type_unsorted: return *std::min_element(begin(), end(), document_order_comparator()); - default: return xpath_node(); - } - } - - void xpath_node_set::remove_duplicates() - { - if (m_type == type_unsorted) - { - std::sort(m_begin, m_end, duplicate_comparator()); - } - - truncate(std::unique(m_begin, m_end)); - } - - struct xpath_context - { - xml_node root; - xpath_node n; - size_t position, size; - }; - - enum lexeme_t - { - lex_none = 0, - lex_equal, - lex_not_equal, - lex_less, - lex_greater, - lex_less_or_equal, - lex_greater_or_equal, - lex_plus, - lex_minus, - lex_multiply, - lex_union, - lex_var_ref, - lex_open_brace, - lex_close_brace, - lex_quoted_string, - lex_number, - lex_slash, - lex_double_slash, - lex_open_square_brace, - lex_close_square_brace, - lex_string, - lex_comma, - lex_axis_attribute, - lex_dot, - lex_double_dot, - lex_double_colon, - lex_eof - }; - - struct xpath_lexer_string - { - const char_t* begin; - const char_t* end; - - xpath_lexer_string(): begin(0), end(0) - { - } - - bool operator==(const char_t* other) const - { - size_t length = static_cast(end - begin); - - return impl::strequalrange(other, begin, length); - } - }; - - class xpath_lexer - { - // disable copy ctor and assignment - xpath_lexer(const xpath_lexer&); - xpath_lexer& operator=(const xpath_lexer&); - - private: - const char_t* m_cur; - xpath_lexer_string m_cur_lexeme_contents; - - lexeme_t m_cur_lexeme; - - void contents_clear() - { - m_cur_lexeme_contents = xpath_lexer_string(); - } - - public: - explicit xpath_lexer(const char_t* query): m_cur(query) - { - next(); - } - - const char_t* state() const - { - return m_cur; - } - - void next() - { - contents_clear(); - - const char_t* cur = m_cur; - - while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; - - switch (*cur) - { - case 0: - m_cur_lexeme = lex_eof; - break; - - case '>': - if (*(cur+1) == '=') - { - cur += 2; - m_cur_lexeme = lex_greater_or_equal; - } - else - { - cur += 1; - m_cur_lexeme = lex_greater; - } - break; - - case '<': - if (*(cur+1) == '=') - { - cur += 2; - m_cur_lexeme = lex_less_or_equal; - } - else - { - cur += 1; - m_cur_lexeme = lex_less; - } - break; - - case '!': - if (*(cur+1) == '=') - { - cur += 2; - m_cur_lexeme = lex_not_equal; - } - else - { - m_cur_lexeme = lex_none; - } - break; - - case '=': - cur += 1; - m_cur_lexeme = lex_equal; - - break; - - case '+': - cur += 1; - m_cur_lexeme = lex_plus; - - break; - - case '-': - cur += 1; - m_cur_lexeme = lex_minus; - - break; - - case '*': - cur += 1; - m_cur_lexeme = lex_multiply; - - break; - - case '|': - cur += 1; - m_cur_lexeme = lex_union; - - break; - - case '$': - cur += 1; - m_cur_lexeme = lex_var_ref; - - break; - - case '(': - cur += 1; - m_cur_lexeme = lex_open_brace; - - break; - - case ')': - cur += 1; - m_cur_lexeme = lex_close_brace; - - break; - - case '[': - cur += 1; - m_cur_lexeme = lex_open_square_brace; - - break; - - case ']': - cur += 1; - m_cur_lexeme = lex_close_square_brace; - - break; - - case ',': - cur += 1; - m_cur_lexeme = lex_comma; - - break; - - case '/': - if (*(cur+1) == '/') - { - cur += 2; - m_cur_lexeme = lex_double_slash; - } - else - { - cur += 1; - m_cur_lexeme = lex_slash; - } - break; - - case '.': - if (*(cur+1) == '.') - { - cur += 2; - m_cur_lexeme = lex_double_dot; - } - else if (IS_CHARTYPEX(*(cur+1), ctx_digit)) - { - m_cur_lexeme_contents.begin = cur; // . - - ++cur; - - while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; - - m_cur_lexeme_contents.end = cur; - - m_cur_lexeme = lex_number; - } - else - { - cur += 1; - m_cur_lexeme = lex_dot; - } - break; - - case '@': - cur += 1; - m_cur_lexeme = lex_axis_attribute; - - break; - - case '"': - case '\'': - { - char_t terminator = *cur; - - ++cur; - - m_cur_lexeme_contents.begin = cur; - while (*cur && *cur != terminator) cur++; - m_cur_lexeme_contents.end = cur; - - if (!*cur) - m_cur_lexeme = lex_none; - else - { - cur += 1; - m_cur_lexeme = lex_quoted_string; - } - - break; - } - - case ':': - if (*(cur+1) == ':') - { - cur += 2; - m_cur_lexeme = lex_double_colon; - } - else - { - m_cur_lexeme = lex_none; - } - break; - - default: - if (IS_CHARTYPEX(*cur, ctx_digit)) - { - m_cur_lexeme_contents.begin = cur; - - while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; - - if (*cur == '.') - { - cur++; - - while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; - } - - m_cur_lexeme_contents.end = cur; - - m_cur_lexeme = lex_number; - } - else if (IS_CHARTYPEX(*cur, ctx_start_symbol)) - { - m_cur_lexeme_contents.begin = cur; - - while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - - if (cur[0] == ':') - { - if (cur[1] == '*') // namespace test ncname:* - { - cur += 2; // :* - } - else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname - { - cur++; // : - - while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - } - } - - m_cur_lexeme_contents.end = cur; - - while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; - - m_cur_lexeme = lex_string; - } - else - { - throw xpath_exception("Unrecognized token"); - } - } - - m_cur = cur; - } - - lexeme_t current() const - { - return m_cur_lexeme; - } - - const xpath_lexer_string& contents() const - { - return m_cur_lexeme_contents; - } - }; - - enum ast_type_t - { - ast_none, - ast_op_or, // left or right - ast_op_and, // left and right - ast_op_equal, // left = right - ast_op_not_equal, // left != right - ast_op_less, // left < right - ast_op_greater, // left > right - ast_op_less_or_equal, // left <= right - ast_op_greater_or_equal, // left >= right - ast_op_add, // left + right - ast_op_subtract, // left - right - ast_op_multiply, // left * right - ast_op_divide, // left / right - ast_op_mod, // left % right - ast_op_negate, // left - right - ast_op_union, // left | right - ast_predicate, // apply predicate to set; next points to next predicate - ast_filter, // select * from left where right - ast_filter_posinv, // select * from left where right; proximity position invariant - ast_string_constant, // string constant - ast_number_constant, // number constant - ast_func_last, // last() - ast_func_position, // position() - ast_func_count, // count(left) - ast_func_id, // id(left) - ast_func_local_name_0, // local-name() - ast_func_local_name_1, // local-name(left) - ast_func_namespace_uri_0, // namespace-uri() - ast_func_namespace_uri_1, // namespace-uri(left) - ast_func_name_0, // name() - ast_func_name_1, // name(left) - ast_func_string_0, // string() - ast_func_string_1, // string(left) - ast_func_concat, // concat(left, right, siblings) - ast_func_starts_with, // starts_with(left, right) - ast_func_contains, // contains(left, right) - ast_func_substring_before, // substring-before(left, right) - ast_func_substring_after, // substring-after(left, right) - ast_func_substring_2, // substring(left, right) - ast_func_substring_3, // substring(left, right, third) - ast_func_string_length_0, // string-length() - ast_func_string_length_1, // string-length(left) - ast_func_normalize_space_0, // normalize-space() - ast_func_normalize_space_1, // normalize-space(left) - ast_func_translate, // translate(left, right, third) - ast_func_boolean, // boolean(left) - ast_func_not, // not(left) - ast_func_true, // true() - ast_func_false, // false() - ast_func_lang, // lang(left) - ast_func_number_0, // number() - ast_func_number_1, // number(left) - ast_func_sum, // sum(left) - ast_func_floor, // floor(left) - ast_func_ceiling, // ceiling(left) - ast_func_round, // round(left) - ast_step, // process set left with step - ast_step_root // select root node - }; - - enum axis_t - { - axis_ancestor, - axis_ancestor_or_self, - axis_attribute, - axis_child, - axis_descendant, - axis_descendant_or_self, - axis_following, - axis_following_sibling, - axis_namespace, - axis_parent, - axis_preceding, - axis_preceding_sibling, - axis_self - }; - - enum nodetest_t - { - nodetest_none, - nodetest_name, - nodetest_type_node, - nodetest_type_comment, - nodetest_type_pi, - nodetest_type_text, - nodetest_pi, - nodetest_all, - nodetest_all_in_namespace - }; - - template struct axis_to_type - { - static const axis_t axis; - }; - - template const axis_t axis_to_type::axis = N; - - class xpath_ast_node - { - private: - // node type - char m_type; - char m_rettype; - - // for ast_step / ast_predicate - char m_axis; - char m_test; - - // tree node structure - xpath_ast_node* m_left; - xpath_ast_node* m_right; - xpath_ast_node* m_next; - - union - { - // value for ast_string_constant - const char_t* string; - // value for ast_number_constant - double number; - // node test for ast_step (node name/namespace/node type/pi target) - const char_t* nodetest; - } m_data; - - xpath_ast_node(const xpath_ast_node&); - xpath_ast_node& operator=(const xpath_ast_node&); - - template static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp) - { - xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); - - if (lt != xpath_type_node_set && rt != xpath_type_node_set) - { - if (lt == xpath_type_boolean || rt == xpath_type_boolean) - return comp(lhs->eval_boolean(c), rhs->eval_boolean(c)); - else if (lt == xpath_type_number || rt == xpath_type_number) - return comp(lhs->eval_number(c), rhs->eval_number(c)); - else if (lt == xpath_type_string || rt == xpath_type_string) - return comp(lhs->eval_string(c), rhs->eval_string(c)); - } - else if (lt == xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_node_set ls = lhs->eval_node_set(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(string_value(*li), string_value(*ri))) - return true; - } - - return false; - } - else - { - if (lt == xpath_type_node_set) - { - std::swap(lhs, rhs); - std::swap(lt, rt); - } - - if (lt == xpath_type_boolean) - return comp(lhs->eval_boolean(c), rhs->eval_boolean(c)); - else if (lt == xpath_type_number) - { - double l = lhs->eval_number(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) - return true; - } - - return false; - } - else if (lt == xpath_type_string) - { - string_t l = lhs->eval_string(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(l, string_value(*ri))) - return true; - } - - return false; - } - } - - assert(!"Wrong types"); - return false; - } - - template static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp) - { - xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); - - if (lt != xpath_type_node_set && rt != xpath_type_node_set) - return comp(lhs->eval_number(c), rhs->eval_number(c)); - else if (lt == xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_node_set ls = lhs->eval_node_set(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) - { - double l = convert_string_to_number(string_value(*li).c_str()); - - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) - return true; - } - } - - return false; - } - else if (lt != xpath_type_node_set && rt == xpath_type_node_set) - { - double l = lhs->eval_number(c); - xpath_node_set rs = rhs->eval_node_set(c); - - for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) - { - if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) - return true; - } - - return false; - } - else if (lt == xpath_type_node_set && rt != xpath_type_node_set) - { - xpath_node_set ls = lhs->eval_node_set(c); - double r = rhs->eval_number(c); - - for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) - { - if (comp(convert_string_to_number(string_value(*li).c_str()), r)) - return true; - } - - return false; - } - else - { - assert(!"Wrong types"); - return false; - } - } - - void apply_predicate(xpath_node_set& ns, size_t first, xpath_ast_node* expr, const xpath_context& context) - { - xpath_context c; - c.root = context.root; - - size_t i = 1; - size_t size = ns.size() - first; - - xpath_node_set::iterator last = ns.mut_begin() + first; - - // remove_if... or well, sort of - for (xpath_node_set::iterator it = last; it != ns.end(); ++it, ++i) - { - c.n = *it; - c.position = i; - c.size = size; - - if (expr->rettype() == xpath_type_number) - { - if (expr->eval_number(c) == i) - *last++ = *it; - } - else if (expr->eval_boolean(c)) - *last++ = *it; - } - - ns.truncate(last); - } - - void apply_predicates(xpath_node_set& ns, size_t first, const xpath_context& context) - { - if (ns.size() <= first) return; - - for (xpath_ast_node* pred = m_right; pred; pred = pred->m_next) - { - apply_predicate(ns, first, pred->m_left, context); - } - } - - void step_push(xpath_node_set& ns, const xml_attribute& a, const xml_node& parent) - { - if (!a) return; - - const char_t* name = a.name(); - - // There are no attribute nodes corresponding to attributes that declare namespaces - // That is, "xmlns:..." or "xmlns" - if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return; - - switch (m_test) - { - case nodetest_name: - if (impl::strequal(name, m_data.nodetest)) ns.push_back(xpath_node(a, parent)); - break; - - case nodetest_type_node: - case nodetest_all: - ns.push_back(xpath_node(a, parent)); - break; - - case nodetest_all_in_namespace: - if (starts_with(name, m_data.nodetest)) - ns.push_back(xpath_node(a, parent)); - break; - - default: - ; - } - } - - void step_push(xpath_node_set& ns, const xml_node& n) - { - if (!n) return; - - switch (m_test) - { - case nodetest_name: - if (n.type() == node_element && impl::strequal(n.name(), m_data.nodetest)) ns.push_back(n); - break; - - case nodetest_type_node: - ns.push_back(n); - break; - - case nodetest_type_comment: - if (n.type() == node_comment) - ns.push_back(n); - break; - - case nodetest_type_text: - if (n.type() == node_pcdata || n.type() == node_cdata) - ns.push_back(n); - break; - - case nodetest_type_pi: - if (n.type() == node_pi) - ns.push_back(n); - break; - - case nodetest_pi: - if (n.type() == node_pi && impl::strequal(n.name(), m_data.nodetest)) - ns.push_back(n); - break; - - case nodetest_all: - if (n.type() == node_element) - ns.push_back(n); - break; - - case nodetest_all_in_namespace: - if (n.type() == node_element && starts_with(n.name(), m_data.nodetest)) - ns.push_back(n); - break; - - default: - assert(!"Unknown axis"); - } - } - - template void step_fill(xpath_node_set& ns, const xml_node& n, T) - { - const axis_t axis = T::axis; - - switch (axis) - { - case axis_attribute: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) - step_push(ns, a, n); - - break; - } - - case axis_child: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - for (xml_node c = n.first_child(); c; c = c.next_sibling()) - step_push(ns, c); - - break; - } - - case axis_descendant: - case axis_descendant_or_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - if (axis == axis_descendant_or_self) - step_push(ns, n); - - xml_node cur = n.first_child(); - - if (cur) - { - do - { - step_push(ns, cur); - - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - // Borland C++ workaround - while (!cur.next_sibling() && cur != n && (bool)cur.parent()) - cur = cur.parent(); - - if (cur != n) - cur = cur.next_sibling(); - } - } - while (cur && cur != n); - } - - break; - } - - case axis_following_sibling: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - for (xml_node c = n.next_sibling(); c; c = c.next_sibling()) - step_push(ns, c); - - break; - } - - case axis_preceding_sibling: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; - - for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling()) - step_push(ns, c); - - break; - } - - case axis_following: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - xml_node cur = n; - - // exit from this node so that we don't include descendants - while (cur && !cur.next_sibling()) cur = cur.parent(); - cur = cur.next_sibling(); - - if (cur) - { - for (;;) - { - step_push(ns, cur); - - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - while (cur && !cur.next_sibling()) cur = cur.parent(); - cur = cur.next_sibling(); - - if (!cur) break; - } - } - } - - break; - } - - case axis_preceding: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; - - xml_node cur = n; - - while (cur && !cur.previous_sibling()) cur = cur.parent(); - cur = cur.previous_sibling(); - - if (cur) - { - for (;;) - { - if (cur.last_child()) - cur = cur.last_child(); - else - { - // leaf node, can't be ancestor - step_push(ns, cur); - - if (cur.previous_sibling()) - cur = cur.previous_sibling(); - else - { - do - { - cur = cur.parent(); - if (!cur) break; - - if (!node_is_ancestor(cur, n)) step_push(ns, cur); - } - while (!cur.previous_sibling()); - - cur = cur.previous_sibling(); - - if (!cur) break; - } - } - } - } - - break; - } - - case axis_ancestor: - case axis_ancestor_or_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; - - if (axis == axis_ancestor_or_self) - step_push(ns, n); - - xml_node cur = n.parent(); - - while (cur) - { - step_push(ns, cur); - - cur = cur.parent(); - } - - break; - } - - case axis_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - step_push(ns, n); - - break; - } - - case axis_parent: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - if (n.parent()) step_push(ns, n.parent()); - - break; - } - - default: - assert(!"Unimplemented axis"); - } - } - - template void step_fill(xpath_node_set& ns, const xml_attribute& a, const xml_node& p, T v) - { - const axis_t axis = T::axis; - - switch (axis) - { - case axis_ancestor: - case axis_ancestor_or_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; - - if (axis == axis_ancestor_or_self && m_test == nodetest_type_node) // reject attributes based on principal node type test - step_push(ns, a, p); - - xml_node cur = p; - - while (cur) - { - step_push(ns, cur); - - cur = cur.parent(); - } - - break; - } - - case axis_descendant_or_self: - case axis_self: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - if (m_test == nodetest_type_node) // reject attributes based on principal node type test - step_push(ns, a, p); - - break; - } - - case axis_following: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - xml_node cur = p; - - for (;;) - { - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - while (cur && !cur.next_sibling()) cur = cur.parent(); - cur = cur.next_sibling(); - - if (!cur) break; - } - - step_push(ns, cur); - } - - break; - } - - case axis_parent: - { - ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; - - step_push(ns, p); - - break; - } - - case axis_preceding: - { - // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding - step_fill(ns, p, v); - break; - } - - default: - assert(!"Unimplemented axis"); - } - } - - template void step_do(xpath_node_set& ns, const xpath_context& c, T v) - { - const axis_t axis = T::axis; - - assert(ns.empty()); - - switch (axis) - { - case axis_ancestor: - case axis_ancestor_or_self: - case axis_descendant_or_self: - case axis_following: - case axis_parent: - case axis_preceding: - case axis_self: - if (m_left) - { - xpath_node_set s = m_left->eval_node_set(c); - - for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it) - { - size_t size = ns.size(); - - if (it->node()) - step_fill(ns, it->node(), v); - else - step_fill(ns, it->attribute(), it->parent(), v); - - apply_predicates(ns, size, c); - } - } - else - { - if (c.n.node()) step_fill(ns, c.n.node(), v); - else step_fill(ns, c.n.attribute(), c.n.parent(), v); - - apply_predicates(ns, 0, c); - } - - break; - - case axis_following_sibling: - case axis_preceding_sibling: - case axis_attribute: - case axis_child: - case axis_descendant: - if (m_left) - { - xpath_node_set s = m_left->eval_node_set(c); - - for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it) - { - size_t size = ns.size(); - - if (it->node()) - step_fill(ns, it->node(), v); - - apply_predicates(ns, size, c); - } - } - else if (c.n.node()) - { - step_fill(ns, c.n.node(), v); - - apply_predicates(ns, 0, c); - } - - break; - - case axis_namespace: - break; - - default: - assert(!"Unimplemented axis"); - } - } - - static const char_t* duplicate_string(const xpath_lexer_string& value, xpath_allocator& a) - { - if (value.begin) - { - size_t length = static_cast(value.end - value.begin); - - char_t* c = static_cast(a.alloc((length + 1) * sizeof(char_t))); - memcpy(c, value.begin, length * sizeof(char_t)); - c[length] = 0; - - return c; - } - else return 0; - } - public: - xpath_ast_node(ast_type_t type, xpath_value_type rettype, const xpath_lexer_string& value, xpath_allocator& a): - m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0) - { - assert(type == ast_string_constant); - m_data.string = duplicate_string(value, a); - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value): - m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0) - { - assert(type == ast_number_constant); - m_data.number = value; - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0): - m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(left), m_right(right), m_next(0) - { - } - - xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const xpath_lexer_string& contents, xpath_allocator& a): - m_type((char)type), m_rettype(xpath_type_node_set), m_axis((char)axis), m_test((char)test), m_left(left), m_right(0), m_next(0) - { - m_data.nodetest = duplicate_string(contents, a); - } - - void set_next(xpath_ast_node* value) - { - m_next = value; - } - - void set_right(xpath_ast_node* value) - { - m_right = value; - } - - bool eval_boolean(const xpath_context& c) - { - switch (m_type) - { - case ast_op_or: - if (m_left->eval_boolean(c)) return true; - else return m_right->eval_boolean(c); - - case ast_op_and: - if (!m_left->eval_boolean(c)) return false; - else return m_right->eval_boolean(c); - - case ast_op_equal: - return compare_eq(m_left, m_right, c, equal_to()); - - case ast_op_not_equal: - return compare_eq(m_left, m_right, c, not_equal_to()); - - case ast_op_less: - return compare_rel(m_left, m_right, c, less()); - - case ast_op_greater: - return compare_rel(m_right, m_left, c, less()); - - case ast_op_less_or_equal: - return compare_rel(m_left, m_right, c, less_equal()); - - case ast_op_greater_or_equal: - return compare_rel(m_right, m_left, c, less_equal()); - - case ast_func_starts_with: - return starts_with(m_left->eval_string(c).c_str(), m_right->eval_string(c).c_str()); - - case ast_func_contains: - { - string_t lr = m_left->eval_string(c); - string_t rr = m_right->eval_string(c); - - return rr.empty() || lr.find(rr) != string_t::npos; - } - - case ast_func_boolean: - return m_left->eval_boolean(c); - - case ast_func_not: - return !m_left->eval_boolean(c); - - case ast_func_true: - return true; - - case ast_func_false: - return false; - - case ast_func_lang: - { - if (c.n.attribute()) return false; - - string_t lang = m_left->eval_string(c); - - for (xml_node n = c.n.node(); n; n = n.parent()) - { - xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); - - if (a) - { - const char_t* value = a.value(); - - // strnicmp / strncasecmp is not portable - for (const char_t* lit = lang.c_str(); *lit; ++lit) - { - if (tolower(*lit) != tolower(*value)) return false; - ++value; - } - - return *value == 0 || *value == '-'; - } - } - - return false; - } - - default: - { - switch (m_rettype) - { - case xpath_type_number: - return convert_number_to_boolean(eval_number(c)); - - case xpath_type_string: - return !eval_string(c).empty(); - - case xpath_type_node_set: - return !eval_node_set(c).empty(); - - default: - assert(!"Wrong expression for return type boolean"); - return false; - } - } - } - } - - double eval_number(const xpath_context& c) - { - switch (m_type) - { - case ast_op_add: - return m_left->eval_number(c) + m_right->eval_number(c); - - case ast_op_subtract: - return m_left->eval_number(c) - m_right->eval_number(c); - - case ast_op_multiply: - return m_left->eval_number(c) * m_right->eval_number(c); - - case ast_op_divide: - return m_left->eval_number(c) / m_right->eval_number(c); - - case ast_op_mod: - return fmod(m_left->eval_number(c), m_right->eval_number(c)); - - case ast_op_negate: - return -m_left->eval_number(c); - - case ast_number_constant: - return m_data.number; - - case ast_func_last: - return (double)c.size; - - case ast_func_position: - return (double)c.position; - - case ast_func_count: - return (double)m_left->eval_node_set(c).size(); - - case ast_func_string_length_0: - return (double)string_value(c.n).size(); - - case ast_func_string_length_1: - return (double)m_left->eval_string(c).size(); - - case ast_func_number_0: - return convert_string_to_number(string_value(c.n).c_str()); - - case ast_func_number_1: - return m_left->eval_number(c); - - case ast_func_sum: - { - double r = 0; - - xpath_node_set ns = m_left->eval_node_set(c); - - for (xpath_node_set::const_iterator it = ns.begin(); it != ns.end(); ++it) - r += convert_string_to_number(string_value(*it).c_str()); - - return r; - } - - case ast_func_floor: - { - double r = m_left->eval_number(c); - - return r == r ? floor(r) : r; - } - - case ast_func_ceiling: - { - double r = m_left->eval_number(c); - - return r == r ? ceil(r) : r; - } - - case ast_func_round: - return round_nearest_nzero(m_left->eval_number(c)); - - default: - { - switch (m_rettype) - { - case xpath_type_boolean: - return eval_boolean(c) ? 1 : 0; - - case xpath_type_string: - return convert_string_to_number(eval_string(c).c_str()); - - case xpath_type_node_set: - return convert_string_to_number(eval_string(c).c_str()); - - default: - assert(!"Wrong expression for return type number"); - return 0; - } - - } - } - } - - string_t eval_string(const xpath_context& c) - { - switch (m_type) - { - case ast_string_constant: - return m_data.string; - - case ast_func_local_name_0: - { - xpath_node na = c.n; - - if (na.attribute()) return local_name(na.attribute().name()); - else return local_name(na.node().name()); - } - - case ast_func_local_name_1: - { - xpath_node_set ns = m_left->eval_node_set(c); - if (ns.empty()) return string_t(); - - xpath_node na = ns.first(); - - if (na.attribute()) return local_name(na.attribute().name()); - else return local_name(na.node().name()); - } - - case ast_func_name_0: - { - xpath_node na = c.n; - - if (na.attribute()) return na.attribute().name(); - else return na.node().name(); - } - - case ast_func_name_1: - { - xpath_node_set ns = m_left->eval_node_set(c); - if (ns.empty()) return string_t(); - - xpath_node na = ns.first(); - - if (na.attribute()) return na.attribute().name(); - else return na.node().name(); - } - - case ast_func_namespace_uri_0: - { - xpath_node na = c.n; - - if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); - else return namespace_uri(na.node()); - } - - case ast_func_namespace_uri_1: - { - xpath_node_set ns = m_left->eval_node_set(c); - if (ns.empty()) return string_t(); - - xpath_node na = ns.first(); - - if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); - else return namespace_uri(na.node()); - } - - case ast_func_string_0: - return string_value(c.n); - - case ast_func_string_1: - return m_left->eval_string(c); - - case ast_func_concat: - { - string_t r = m_left->eval_string(c); - - for (xpath_ast_node* n = m_right; n; n = n->m_next) - r += n->eval_string(c); - - return r; - } - - case ast_func_substring_before: - { - string_t s = m_left->eval_string(c); - string_t::size_type pos = s.find(m_right->eval_string(c)); - - if (pos == string_t::npos) return string_t(); - else return string_t(s.begin(), s.begin() + pos); - } - - case ast_func_substring_after: - { - string_t s = m_left->eval_string(c); - string_t p = m_right->eval_string(c); - - string_t::size_type pos = s.find(p); - - if (pos == string_t::npos) return string_t(); - else return string_t(s.begin() + pos + p.length(), s.end()); - } - - case ast_func_substring_2: - { - string_t s = m_left->eval_string(c); - double first = round_nearest(m_right->eval_number(c)); - - if (is_nan(first)) return string_t(); // NaN - else if (first >= s.length() + 1) return string_t(); - - size_t pos = first < 1 ? 1 : (size_t)first; - - return s.substr(pos - 1); - } - - case ast_func_substring_3: - { - string_t s = m_left->eval_string(c); - double first = round_nearest(m_right->eval_number(c)); - double last = first + round_nearest(m_right->m_next->eval_number(c)); - - if (is_nan(first) || is_nan(last)) return string_t(); - else if (first >= s.length() + 1) return string_t(); - else if (first >= last) return string_t(); - - size_t pos = first < 1 ? 1 : (size_t)first; - size_t end = last >= s.length() + 1 ? s.length() + 1 : (size_t)last; - - size_t size_requested = end - pos; - size_t size_to_end = s.length() - pos + 1; - - return s.substr(pos - 1, size_requested < size_to_end ? size_requested : size_to_end); - } - - case ast_func_normalize_space_0: - case ast_func_normalize_space_1: - { - string_t s = m_type == ast_func_normalize_space_0 ? string_value(c.n) : m_left->eval_string(c); - - string_t r; - r.reserve(s.size()); - - for (string_t::const_iterator it = s.begin(); it != s.end(); ++it) - { - if (IS_CHARTYPEX(*it, ctx_space)) - { - if (!r.empty() && r[r.size() - 1] != ' ') - r += ' '; - } - else r += *it; - } - - string_t::size_type pos = r.find_last_not_of(' '); - if (pos == string_t::npos) r = string_t(); - else r.erase(r.begin() + pos + 1, r.end()); - - return r; - } - - case ast_func_translate: - { - string_t s = m_left->eval_string(c); - string_t from = m_right->eval_string(c); - string_t to = m_right->m_next->eval_string(c); - - for (string_t::iterator it = s.begin(); it != s.end(); ) - { - string_t::size_type pos = from.find(*it); - - if (pos == string_t::npos) - ++it; - else if (pos >= to.length()) - it = s.erase(it); - else - *it++ = to[pos]; - } - - return s; - } - - default: - { - switch (m_rettype) - { - case xpath_type_boolean: - return eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"); - - case xpath_type_number: - return convert_number_to_string(eval_number(c)); - - case xpath_type_node_set: - { - xpath_node_set ns = eval_node_set(c); - return ns.empty() ? string_t() : string_value(ns.first()); - } - - default: - assert(!"Wrong expression for return type string"); - return string_t(); - } - } - } - } - - xpath_node_set eval_node_set(const xpath_context& c) - { - switch (m_type) - { - case ast_op_union: - { - xpath_node_set ls = m_left->eval_node_set(c); - xpath_node_set rs = m_right->eval_node_set(c); - - // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother - ls.m_type = xpath_node_set::type_unsorted; - - ls.append(rs.begin(), rs.end()); - - ls.remove_duplicates(); - - return ls; - } - - case ast_filter: - case ast_filter_posinv: - { - xpath_node_set set = m_left->eval_node_set(c); - - // either expression is a number or it contains position() call; sort by document order - if (m_type == ast_filter) set.sort(); - - apply_predicate(set, 0, m_right, c); - - return set; - } - - case ast_func_id: - return xpath_node_set(); - - case ast_step: - { - xpath_node_set ns; - - switch (m_axis) - { - case axis_ancestor: - step_do(ns, c, axis_to_type()); - break; - - case axis_ancestor_or_self: - step_do(ns, c, axis_to_type()); - break; - - case axis_attribute: - step_do(ns, c, axis_to_type()); - break; - - case axis_child: - step_do(ns, c, axis_to_type()); - break; - - case axis_descendant: - step_do(ns, c, axis_to_type()); - break; - - case axis_descendant_or_self: - step_do(ns, c, axis_to_type()); - break; - - case axis_following: - step_do(ns, c, axis_to_type()); - break; - - case axis_following_sibling: - step_do(ns, c, axis_to_type()); - break; - - case axis_namespace: - step_do(ns, c, axis_to_type()); - break; - - case axis_parent: - step_do(ns, c, axis_to_type()); - break; - - case axis_preceding: - step_do(ns, c, axis_to_type()); - break; - - case axis_preceding_sibling: - step_do(ns, c, axis_to_type()); - break; - - case axis_self: - step_do(ns, c, axis_to_type()); - break; - } - - ns.remove_duplicates(); - - return ns; - } - - case ast_step_root: - { - xpath_node_set ns; - - if (c.root) - { - ns.push_back(c.root); - - apply_predicates(ns, 0, c); - } - - return ns; - } - - default: - assert(!"Wrong expression for return type node set"); - return xpath_node_set(); - } - } - - bool is_posinv() - { - switch (m_type) - { - case ast_func_position: - return false; - - case ast_string_constant: - case ast_number_constant: - // $$ case ast_variable: - return true; - - case ast_step: - case ast_step_root: - return true; - - case ast_predicate: - case ast_filter: - case ast_filter_posinv: - return true; - - default: - if (m_left && !m_left->is_posinv()) return false; - - for (xpath_ast_node* n = m_right; n; n = n->m_next) - if (!n->is_posinv()) return false; - - return true; - } - } - - xpath_value_type rettype() const - { - return static_cast(m_rettype); - } - }; - - void* xpath_allocator::node() - { - return alloc(sizeof(xpath_ast_node)); - } - - class xpath_parser - { - private: - xpath_allocator& m_alloc; - xpath_lexer m_lexer; - - xpath_parser(const xpath_parser&); - xpath_parser& operator=(const xpath_parser&); - - xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) - { - assert(argc <= 1); - - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw xpath_exception("Function has to be applied to node set"); - - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); - } - - xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) - { - switch (name.begin[0]) - { - case 'b': - if (name == PUGIXML_TEXT("boolean") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); - - break; - - case 'c': - if (name == PUGIXML_TEXT("count") && argc == 1) - { - if (args[0]->rettype() != xpath_type_node_set) throw xpath_exception("count() has to be applied to node set"); - return new (m_alloc.node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); - } - else if (name == PUGIXML_TEXT("contains") && argc == 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("concat") && argc >= 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("ceiling") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); - - break; - - case 'f': - if (name == PUGIXML_TEXT("false") && argc == 0) - return new (m_alloc.node()) xpath_ast_node(ast_func_false, xpath_type_boolean); - else if (name == PUGIXML_TEXT("floor") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); - - break; - - case 'i': - if (name == PUGIXML_TEXT("id") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); - - break; - - case 'l': - if (name == PUGIXML_TEXT("last") && argc == 0) - return new (m_alloc.node()) xpath_ast_node(ast_func_last, xpath_type_number); - else if (name == PUGIXML_TEXT("lang") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("local-name") && argc <= 1) - return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); - - break; - - case 'n': - if (name == PUGIXML_TEXT("name") && argc <= 1) - return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); - else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) - return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); - else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("not") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("number") && argc <= 1) - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); - - break; - - case 'p': - if (name == PUGIXML_TEXT("position") && argc == 0) - return new (m_alloc.node()) xpath_ast_node(ast_func_position, xpath_type_number); - - break; - - case 'r': - if (name == PUGIXML_TEXT("round") && argc == 1) - return new (m_alloc.node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); - - break; - - case 's': - if (name == PUGIXML_TEXT("string") && argc <= 1) - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); - else if (name == PUGIXML_TEXT("string-length") && argc <= 1) - return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]); - else if (name == PUGIXML_TEXT("starts-with") && argc == 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-before") && argc == 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-after") && argc == 2) - return new (m_alloc.node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) - return new (m_alloc.node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("sum") && argc == 1) - { - if (args[0]->rettype() != xpath_type_node_set) throw xpath_exception("sum() has to be applied to node set"); - return new (m_alloc.node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); - } - - break; - - case 't': - if (name == PUGIXML_TEXT("translate") && argc == 3) - return new (m_alloc.node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("true") && argc == 0) - return new (m_alloc.node()) xpath_ast_node(ast_func_true, xpath_type_boolean); - - break; - } - - throw xpath_exception("Unrecognized function or wrong parameter count"); - - #ifdef __DMC__ - return 0; // Digital Mars C++ - #endif - } - - axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) - { - specified = true; - - switch (name.begin[0]) - { - case 'a': - if (name == PUGIXML_TEXT("ancestor")) - return axis_ancestor; - else if (name == PUGIXML_TEXT("ancestor-or-self")) - return axis_ancestor_or_self; - else if (name == PUGIXML_TEXT("attribute")) - return axis_attribute; - - break; - - case 'c': - if (name == PUGIXML_TEXT("child")) - return axis_child; - - break; - - case 'd': - if (name == PUGIXML_TEXT("descendant")) - return axis_descendant; - else if (name == PUGIXML_TEXT("descendant-or-self")) - return axis_descendant_or_self; - - break; - - case 'f': - if (name == PUGIXML_TEXT("following")) - return axis_following; - else if (name == PUGIXML_TEXT("following-sibling")) - return axis_following_sibling; - - break; - - case 'n': - if (name == PUGIXML_TEXT("namespace")) - return axis_namespace; - - break; - - case 'p': - if (name == PUGIXML_TEXT("parent")) - return axis_parent; - else if (name == PUGIXML_TEXT("preceding")) - return axis_preceding; - else if (name == PUGIXML_TEXT("preceding-sibling")) - return axis_preceding_sibling; - - break; - - case 's': - if (name == PUGIXML_TEXT("self")) - return axis_self; - - break; - } - - specified = false; - return axis_child; - } - - nodetest_t parse_node_test_type(const xpath_lexer_string& name) - { - switch (name.begin[0]) - { - case 'c': - if (name == PUGIXML_TEXT("comment")) - return nodetest_type_comment; - - break; - - case 'n': - if (name == PUGIXML_TEXT("node")) - return nodetest_type_node; - - break; - - case 'p': - if (name == PUGIXML_TEXT("processing-instruction")) - return nodetest_type_pi; - - break; - - case 't': - if (name == PUGIXML_TEXT("text")) - return nodetest_type_text; - - break; - } - - return nodetest_none; - } - - // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall - xpath_ast_node* parse_primary_expression() - { - switch (m_lexer.current()) - { - case lex_var_ref: - { - throw xpath_exception("Variables are not supported"); - - #ifdef __DMC__ - return 0; // Digital Mars C++ - #endif - } - - case lex_open_brace: - { - m_lexer.next(); - - xpath_ast_node* n = parse_expression(); - - if (m_lexer.current() != lex_close_brace) - throw xpath_exception("Unmatched braces"); - - m_lexer.next(); - - return n; - } - - case lex_quoted_string: - { - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_string_constant, xpath_type_string, m_lexer.contents(), m_alloc); - m_lexer.next(); - - return n; - } - - case lex_number: - { - double value = convert_string_to_number(m_lexer.contents().begin, m_lexer.contents().end); - - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); - m_lexer.next(); - - return n; - } - - case lex_string: - { - xpath_ast_node* args[2] = {0}; - size_t argc = 0; - - xpath_lexer_string function = m_lexer.contents(); - m_lexer.next(); - - xpath_ast_node* last_arg = 0; - - if (m_lexer.current() != lex_open_brace) - throw xpath_exception("Unrecognized function call"); - m_lexer.next(); - - if (m_lexer.current() != lex_close_brace) - args[argc++] = parse_expression(); - - while (m_lexer.current() != lex_close_brace) - { - if (m_lexer.current() != lex_comma) - throw xpath_exception("No comma between function arguments"); - m_lexer.next(); - - xpath_ast_node* n = parse_expression(); - - if (argc < 2) args[argc] = n; - else last_arg->set_next(n); - - argc++; - last_arg = n; - } - - m_lexer.next(); - - return parse_function(function, argc, args); - } - - default: - throw xpath_exception("Unrecognizable primary expression"); - #ifdef __DMC__ - return 0; // Digital Mars C++ - #endif - } - } - - // FilterExpr ::= PrimaryExpr | FilterExpr Predicate - // Predicate ::= '[' PredicateExpr ']' - // PredicateExpr ::= Expr - xpath_ast_node* parse_filter_expression() - { - xpath_ast_node* n = parse_primary_expression(); - - while (m_lexer.current() == lex_open_square_brace) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_expression(); - - if (n->rettype() != xpath_type_node_set) throw xpath_exception("Predicate has to be applied to node set"); - - bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv(); - - n = new (m_alloc.node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr); - - if (m_lexer.current() != lex_close_square_brace) - throw xpath_exception("Unmatched square brace"); - - m_lexer.next(); - } - - return n; - } - - // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep - // AxisSpecifier ::= AxisName '::' | '@'? - // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' - // NameTest ::= '*' | NCName ':' '*' | QName - // AbbreviatedStep ::= '.' | '..' - xpath_ast_node* parse_step(xpath_ast_node* set) - { - if (set && set->rettype() != xpath_type_node_set) - throw xpath_exception("Step has to be applied to node set"); - - bool axis_specified = false; - axis_t axis = axis_child; // implied child axis - - if (m_lexer.current() == lex_axis_attribute) - { - axis = axis_attribute; - axis_specified = true; - - m_lexer.next(); - } - else if (m_lexer.current() == lex_dot) - { - m_lexer.next(); - - return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, xpath_lexer_string(), m_alloc); - } - else if (m_lexer.current() == lex_double_dot) - { - m_lexer.next(); - - return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, xpath_lexer_string(), m_alloc); - } - - nodetest_t nt_type = nodetest_none; - xpath_lexer_string nt_name; - - if (m_lexer.current() == lex_string) - { - // node name test - nt_name = m_lexer.contents(); - m_lexer.next(); - - // was it an axis name? - if (m_lexer.current() == lex_double_colon) - { - // parse axis name - if (axis_specified) throw xpath_exception("Two axis specifiers in one step"); - - axis = parse_axis_name(nt_name, axis_specified); - - if (!axis_specified) throw xpath_exception("Unknown axis"); - - // read actual node test - m_lexer.next(); - - if (m_lexer.current() == lex_multiply) - { - nt_type = nodetest_all; - nt_name = xpath_lexer_string(); - m_lexer.next(); - } - else if (m_lexer.current() == lex_string) - { - nt_name = m_lexer.contents(); - m_lexer.next(); - } - else throw xpath_exception("Unrecognized node test"); - } - - if (nt_type == nodetest_none) - { - // node type test or processing-instruction - if (m_lexer.current() == lex_open_brace) - { - m_lexer.next(); - - if (m_lexer.current() == lex_close_brace) - { - m_lexer.next(); - - nt_type = parse_node_test_type(nt_name); - - if (nt_type == nodetest_none) throw xpath_exception("Unrecognized node type"); - - nt_name = xpath_lexer_string(); - } - else if (nt_name == PUGIXML_TEXT("processing-instruction")) - { - if (m_lexer.current() != lex_quoted_string) - throw xpath_exception("Only literals are allowed as arguments to processing-instruction()"); - - nt_type = nodetest_pi; - nt_name = m_lexer.contents(); - m_lexer.next(); - - if (m_lexer.current() != lex_close_brace) - throw xpath_exception("Unmatched brace near processing-instruction()"); - m_lexer.next(); - } - else - throw xpath_exception("Unmatched brace near node type test"); - - } - // QName or NCName:* - else - { - const char_t* colon_pos = std::char_traits::find(nt_name.begin, static_cast(nt_name.end - nt_name.begin), ':'); - - if (colon_pos && colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:* - { - nt_name.end--; // erase * - - nt_type = nodetest_all_in_namespace; - } - else nt_type = nodetest_name; - } - } - } - else if (m_lexer.current() == lex_multiply) - { - nt_type = nodetest_all; - m_lexer.next(); - } - else throw xpath_exception("Unrecognized node test"); - - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name, m_alloc); - - xpath_ast_node* last = 0; - - while (m_lexer.current() == lex_open_square_brace) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_expression(); - - xpath_ast_node* pred = new (m_alloc.node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr); - - if (m_lexer.current() != lex_close_square_brace) - throw xpath_exception("Unmatched square brace"); - m_lexer.next(); - - if (last) last->set_next(pred); - else n->set_right(pred); - - last = pred; - } - - return n; - } - - // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step - xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) - { - xpath_ast_node* n = parse_step(set); - - while (m_lexer.current() == lex_slash || m_lexer.current() == lex_double_slash) - { - lexeme_t l = m_lexer.current(); - m_lexer.next(); - - if (l == lex_double_slash) - n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); - - n = parse_step(n); - } - - return n; - } - - // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath - // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath - xpath_ast_node* parse_location_path() - { - if (m_lexer.current() == lex_slash) - { - m_lexer.next(); - - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - - // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path - lexeme_t l = m_lexer.current(); - - if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) - return parse_relative_location_path(n); - else - return n; - } - else if (m_lexer.current() == lex_double_slash) - { - m_lexer.next(); - - xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); - - return parse_relative_location_path(n); - } - else - { - return parse_relative_location_path(0); - } - } - - // PathExpr ::= LocationPath - // | FilterExpr - // | FilterExpr '/' RelativeLocationPath - // | FilterExpr '//' RelativeLocationPath - xpath_ast_node* parse_path_expression() - { - // Clarification. - // PathExpr begins with either LocationPath or FilterExpr. - // FilterExpr begins with PrimaryExpr - // PrimaryExpr begins with '$' in case of it being a variable reference, - // '(' in case of it being an expression, string literal, number constant or - // function call. - - if (m_lexer.current() == lex_var_ref || m_lexer.current() == lex_open_brace || - m_lexer.current() == lex_quoted_string || m_lexer.current() == lex_number || - m_lexer.current() == lex_string) - { - if (m_lexer.current() == lex_string) - { - // This is either a function call, or not - if not, we shall proceed with location path - const char_t* state = m_lexer.state(); - - while (IS_CHARTYPEX(*state, ctx_space)) ++state; - - if (*state != '(') return parse_location_path(); - - // This looks like a function call; however this still can be a node-test. Check it. - if (parse_node_test_type(m_lexer.contents()) != nodetest_none) return parse_location_path(); - } - - xpath_ast_node* n = parse_filter_expression(); - - if (m_lexer.current() == lex_slash || m_lexer.current() == lex_double_slash) - { - lexeme_t l = m_lexer.current(); - m_lexer.next(); - - if (l == lex_double_slash) - { - if (n->rettype() != xpath_type_node_set) throw xpath_exception("Step has to be applied to node set"); - - n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); - } - - // select from location path - return parse_relative_location_path(n); - } - - return n; - } - else return parse_location_path(); - } - - // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr - xpath_ast_node* parse_union_expression() - { - xpath_ast_node* n = parse_path_expression(); - - while (m_lexer.current() == lex_union) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_union_expression(); - - if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set) - throw xpath_exception("Union operator has to be applied to node sets"); - - n = new (m_alloc.node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr); - } - - return n; - } - - // UnaryExpr ::= UnionExpr | '-' UnaryExpr - xpath_ast_node* parse_unary_expression() - { - if (m_lexer.current() == lex_minus) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_unary_expression(); - - return new (m_alloc.node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); - } - else return parse_union_expression(); - } - - // MultiplicativeExpr ::= UnaryExpr - // | MultiplicativeExpr '*' UnaryExpr - // | MultiplicativeExpr 'div' UnaryExpr - // | MultiplicativeExpr 'mod' UnaryExpr - xpath_ast_node* parse_multiplicative_expression() - { - xpath_ast_node* n = parse_unary_expression(); - - while (m_lexer.current() == lex_multiply || (m_lexer.current() == lex_string && - (m_lexer.contents() == PUGIXML_TEXT("mod") || m_lexer.contents() == PUGIXML_TEXT("div")))) - { - ast_type_t op = m_lexer.current() == lex_multiply ? ast_op_multiply : - m_lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod; - m_lexer.next(); - - xpath_ast_node* expr = parse_unary_expression(); - - n = new (m_alloc.node()) xpath_ast_node(op, xpath_type_number, n, expr); - } - - return n; - } - - // AdditiveExpr ::= MultiplicativeExpr - // | AdditiveExpr '+' MultiplicativeExpr - // | AdditiveExpr '-' MultiplicativeExpr - xpath_ast_node* parse_additive_expression() - { - xpath_ast_node* n = parse_multiplicative_expression(); - - while (m_lexer.current() == lex_plus || m_lexer.current() == lex_minus) - { - lexeme_t l = m_lexer.current(); - - m_lexer.next(); - - xpath_ast_node* expr = parse_multiplicative_expression(); - - n = new (m_alloc.node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr); - } - - return n; - } - - // RelationalExpr ::= AdditiveExpr - // | RelationalExpr '<' AdditiveExpr - // | RelationalExpr '>' AdditiveExpr - // | RelationalExpr '<=' AdditiveExpr - // | RelationalExpr '>=' AdditiveExpr - xpath_ast_node* parse_relational_expression() - { - xpath_ast_node* n = parse_additive_expression(); - - while (m_lexer.current() == lex_less || m_lexer.current() == lex_less_or_equal || - m_lexer.current() == lex_greater || m_lexer.current() == lex_greater_or_equal) - { - lexeme_t l = m_lexer.current(); - m_lexer.next(); - - xpath_ast_node* expr = parse_additive_expression(); - - n = new (m_alloc.node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater : - l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr); - } - - return n; - } - - // EqualityExpr ::= RelationalExpr - // | EqualityExpr '=' RelationalExpr - // | EqualityExpr '!=' RelationalExpr - xpath_ast_node* parse_equality_expression() - { - xpath_ast_node* n = parse_relational_expression(); - - while (m_lexer.current() == lex_equal || m_lexer.current() == lex_not_equal) - { - lexeme_t l = m_lexer.current(); - - m_lexer.next(); - - xpath_ast_node* expr = parse_relational_expression(); - - n = new (m_alloc.node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr); - } - - return n; - } - - // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr - xpath_ast_node* parse_and_expression() - { - xpath_ast_node* n = parse_equality_expression(); - - while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("and")) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_equality_expression(); - - n = new (m_alloc.node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr); - } - - return n; - } - - // OrExpr ::= AndExpr | OrExpr 'or' AndExpr - xpath_ast_node* parse_or_expression() - { - xpath_ast_node* n = parse_and_expression(); - - while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("or")) - { - m_lexer.next(); - - xpath_ast_node* expr = parse_and_expression(); - - n = new (m_alloc.node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr); - } - - return n; - } - - // Expr ::= OrExpr - xpath_ast_node* parse_expression() - { - return parse_or_expression(); - } - - public: - explicit xpath_parser(const char_t* query, xpath_allocator& alloc): m_alloc(alloc), m_lexer(query) - { - } - - xpath_ast_node* parse() - { - xpath_ast_node* result = parse_expression(); - - if (m_lexer.current() != lex_eof) - { - // there are still unparsed tokens left, error - throw xpath_exception("Incorrect query"); - } - - return result; - } - }; - - xpath_query::xpath_query(const char_t* query): m_alloc(0), m_root(0) - { - compile(query); - } - - xpath_query::~xpath_query() - { - delete m_alloc; - } - - void xpath_query::compile(const char_t* query) - { - delete m_alloc; - m_alloc = new xpath_allocator; - - xpath_parser p(query, *m_alloc); - - m_root = p.parse(); - } - - xpath_value_type xpath_query::return_type() const - { - if (!m_root) return xpath_type_none; - - return m_root->rettype(); - } - - bool xpath_query::evaluate_boolean(const xml_node& n) const - { - if (!m_root) return false; - - xpath_context c; - - c.root = n.root(); - c.n = n; - c.position = 1; - c.size = 1; - - return m_root->eval_boolean(c); - } - - double xpath_query::evaluate_number(const xml_node& n) const - { - if (!m_root) return gen_nan(); - - xpath_context c; - - c.root = n.root(); - c.n = n; - c.position = 1; - c.size = 1; - - return m_root->eval_number(c); - } - - string_t xpath_query::evaluate_string(const xml_node& n) const - { - if (!m_root) return string_t(); - - xpath_context c; - - c.root = n.root(); - c.n = n; - c.position = 1; - c.size = 1; - - return m_root->eval_string(c); - } - - xpath_node_set xpath_query::evaluate_node_set(const xml_node& n) const - { - if (!m_root) return xpath_node_set(); - if (m_root->rettype() != xpath_type_node_set) throw xpath_exception("Expression does not evaluate to node set"); - - xpath_context c; - - c.root = n.root(); - c.n = n; - c.position = 1; - c.size = 1; - - return m_root->eval_node_set(c); - } - - xpath_node xml_node::select_single_node(const char_t* query) const - { - xpath_query q(query); - return select_single_node(q); - } - - xpath_node xml_node::select_single_node(const xpath_query& query) const - { - xpath_node_set s = query.evaluate_node_set(*this); - return s.empty() ? xpath_node() : s.first(); - } - - xpath_node_set xml_node::select_nodes(const char_t* query) const - { - xpath_query q(query); - return select_nodes(q); - } - - xpath_node_set xml_node::select_nodes(const xpath_query& query) const - { - return query.evaluate_node_set(*this); - } -} - -#endif - -/** - * Copyright (c) 2006-2010 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/** + * pugixml parser - version 0.9 + * -------------------------------------------------------- + * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at http://code.google.com/p/pugixml/ + * + * This library is distributed under the MIT License. See notice at the end + * of this file. + * + * This work is based on the pugxml parser, which is: + * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) + */ + +#include "pugixml.hpp" + +#ifndef PUGIXML_NO_XPATH + +#include + +#include +#include +#include +#include +#include +#include + +#ifdef PUGIXML_WCHAR_MODE +# include +#endif + +// int32_t +#if !defined(_MSC_VER) || _MSC_VER >= 1600 +# include +#else +typedef __int32 int32_t; +#endif + +#if defined(_MSC_VER) +# pragma warning(disable: 4127) // conditional expression is constant +# pragma warning(disable: 4996) // this function or variable may be unsafe +#endif + +#ifdef __INTEL_COMPILER +# pragma warning(disable: 1478 1786) // function was declared "deprecated" +#endif + +#ifdef __SNC__ +# pragma diag_suppress=237 // controlling expression is constant +#endif + +#include +#include + +// String utilities prototypes +namespace pugi +{ + namespace impl + { + bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); + void widen_ascii(wchar_t* dest, const char* source); + } +} + +namespace +{ + using namespace pugi; + + enum chartypex + { + ctx_space = 1, // \r, \n, space, tab + ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _ + ctx_digit = 4, // 0-9 + ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . + }; + + const unsigned char chartypex_table[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47 + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63 + 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79 + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95 + 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111 + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127 + + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 + }; + +#ifdef PUGIXML_WCHAR_MODE + #define IS_CHARTYPEX(c, ct) ((static_cast(c) < 128 ? chartypex_table[static_cast(c)] : chartypex_table[128]) & (ct)) +#else + #define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast(c)] & (ct)) +#endif + + bool starts_with(const char_t* string, const char_t* pattern) + { + while (*pattern && *string == *pattern) + { + string++; + pattern++; + } + + return *pattern == 0; + } + + const char_t* find_char(const char_t* s, char_t c) + { + #ifdef PUGIXML_WCHAR_MODE + return wcschr(s, c); + #else + return ::strchr(s, c); + #endif + } + + string_t string_value(const xpath_node& na) + { + if (na.attribute()) + return na.attribute().value(); + else + { + const xml_node& n = na.node(); + + switch (n.type()) + { + case node_pcdata: + case node_cdata: + case node_comment: + case node_pi: + return n.value(); + + case node_document: + case node_element: + { + string_t result; + + xml_node cur = n.first_child(); + + if (cur) + { + do + { + if (cur.type() == node_pcdata || cur.type() == node_cdata) + result += cur.value(); + + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + // Borland C++ workaround + while (!cur.next_sibling() && cur != n && (bool)cur.parent()) + cur = cur.parent(); + + if (cur != n) + cur = cur.next_sibling(); + } + } + while (cur && cur != n); + } + + return result; + } + + default: + return string_t(); + } + } + } + + unsigned int node_height(xml_node n) + { + unsigned int result = 0; + + while (n) + { + ++result; + n = n.parent(); + } + + return result; + } + + // precondition: node_height of ln is <= node_height of rn, ln != rn + bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh) + { + assert(lh <= rh); + + while (lh < rh) + { + --rh; + rn = rn.parent(); + } + + if (ln == rn) return true; + + while (ln.parent() != rn.parent()) + { + ln = ln.parent(); + rn = rn.parent(); + } + + for (; ln; ln = ln.next_sibling()) + if (ln == rn) + return true; + + return false; + } + + bool node_is_ancestor(xml_node parent, xml_node node) + { + while (node && node != parent) node = node.parent(); + + return parent && node == parent; + } + + struct document_order_comparator + { + bool operator()(const xpath_node& lhs, const xpath_node& rhs) const + { + xml_node ln = lhs.node(), rn = rhs.node(); + + if (lhs.attribute() && rhs.attribute()) + { + if (lhs.parent() == rhs.parent()) + { + for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) + if (a == rhs.attribute()) + return true; + + return false; + } + + ln = lhs.parent(); + rn = rhs.parent(); + } + else if (lhs.attribute()) + { + if (lhs.parent() == rhs.node()) return false; + + ln = lhs.parent(); + } + else if (rhs.attribute()) + { + if (rhs.parent() == lhs.node()) return true; + + rn = rhs.parent(); + } + + if (ln == rn) return false; + + unsigned int lh = node_height(ln); + unsigned int rh = node_height(rn); + + return (lh <= rh) ? node_is_before(ln, lh, rn, rh) : !node_is_before(rn, rh, ln, lh); + } + }; + + struct duplicate_comparator + { + bool operator()(const xpath_node& lhs, const xpath_node& rhs) const + { + if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; + else return rhs.attribute() ? false : lhs.node() < rhs.node(); + } + }; + + double gen_nan() + { + #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) + union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1]; + u[0].i = 0x7fc00000; + return u[0].f; + #else + // fallback + const volatile double zero = 0.0; + return zero / zero; + #endif + } + + bool is_nan(double value) + { + #if defined(_MSC_VER) || defined(__BORLANDC__) + return !!_isnan(value); + #elif defined(fpclassify) && defined(FP_NAN) + return fpclassify(value) == FP_NAN; + #else + // fallback + const volatile double v = value; + return v != v; + #endif + } + + const char_t* convert_number_to_string_special(double value) + { + #if defined(_MSC_VER) || defined(__BORLANDC__) + if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; + if (_isnan(value)) return PUGIXML_TEXT("NaN"); + return PUGIXML_TEXT("-Infinity") + (value > 0); + #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) + switch (fpclassify(value)) + { + case FP_NAN: + return PUGIXML_TEXT("NaN"); + + case FP_INFINITE: + return PUGIXML_TEXT("-Infinity") + (value > 0); + + case FP_ZERO: + return PUGIXML_TEXT("0"); + + default: + return 0; + } + #else + // fallback + const volatile double v = value; + + if (v == 0) return PUGIXML_TEXT("0"); + if (v != v) return PUGIXML_TEXT("NaN"); + if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0); + return 0; + #endif + } + + bool convert_number_to_boolean(double value) + { + return (value != 0 && !is_nan(value)); + } + + // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent + void convert_number_to_mantissa_exponent(double value, char* buffer, char** out_mantissa, int* out_exponent) + { + // get a scientific notation value with IEEE DBL_DIG decimals + sprintf(buffer, "%.15e", value); + + // get the exponent (possibly negative) + char* exponent_string = strchr(buffer, 'e'); + assert(exponent_string); + + int exponent = atoi(exponent_string + 1); + + // extract mantissa string: skip sign + char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; + assert(mantissa[0] != '0' && mantissa[1] == '.'); + + // divide mantissa by 10 to eliminate integer part + mantissa[1] = mantissa[0]; + mantissa++; + exponent++; + + // remove extra mantissa digits and zero-terminate mantissa + char* mantissa_end = exponent_string; + + while (mantissa != mantissa_end && *(mantissa_end - 1) == '0') --mantissa_end; + + *mantissa_end = 0; + + // fill results + *out_mantissa = mantissa; + *out_exponent = exponent; + } + + string_t convert_number_to_string(double value) + { + // try special number conversion + const char_t* special = convert_number_to_string_special(value); + if (special) return special; + + // get mantissa + exponent form + char mantissa_buffer[64]; + + char* mantissa; + int exponent; + convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); + + // make the number! + char_t result[512]; + char_t* s = result; + + // sign + if (value < 0) *s++ = '-'; + + // integer part + if (exponent <= 0) + { + *s++ = '0'; + } + else + { + while (exponent > 0) + { + assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9); + *s++ = *mantissa ? *mantissa++ : '0'; + exponent--; + } + } + + // fractional part + if (*mantissa) + { + // decimal point + *s++ = '.'; + + // extra zeroes from negative exponent + while (exponent < 0) + { + *s++ = '0'; + exponent++; + } + + // extra mantissa digits + while (*mantissa) + { + assert((unsigned)(*mantissa - '0') <= 9); + *s++ = *mantissa++; + } + } + + // zero-terminate + assert(s < result + sizeof(result) / sizeof(result[0])); + *s = 0; + + return string_t(result); + } + + bool check_string_to_number_format(const char_t* string) + { + // parse leading whitespace + while (IS_CHARTYPEX(*string, ctx_space)) ++string; + + // parse sign + if (*string == '-') ++string; + + if (!*string) return false; + + // if there is no integer part, there should be a decimal part with at least one digit + if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false; + + // parse integer part + while (IS_CHARTYPEX(*string, ctx_digit)) ++string; + + // parse decimal part + if (*string == '.') + { + ++string; + + while (IS_CHARTYPEX(*string, ctx_digit)) ++string; + } + + // parse trailing whitespace + while (IS_CHARTYPEX(*string, ctx_space)) ++string; + + return *string == 0; + } + + double convert_string_to_number(const char_t* string) + { + // check string format + if (!check_string_to_number_format(string)) return gen_nan(); + + // parse string + #ifdef PUGIXML_WCHAR_MODE + return wcstod(string, 0); + #else + return atof(string); + #endif + } + + double convert_string_to_number(const char_t* begin, const char_t* end) + { + char_t buffer[32]; + + size_t length = static_cast(end - begin); + + if (length < sizeof(buffer) / sizeof(buffer[0])) + { + // optimized on-stack conversion + memcpy(buffer, begin, length * sizeof(char_t)); + buffer[length] = 0; + + return convert_string_to_number(buffer); + } + else + { + // need to make dummy on-heap copy + string_t copy(begin, end); + + return convert_string_to_number(copy.c_str()); + } + } + + double round_nearest(double value) + { + return floor(value + 0.5); + } + + double round_nearest_nzero(double value) + { + // same as round_nearest, but returns -0 for [-0.5, -0] + // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) + return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); + } + + const char_t* local_name(const char_t* name) + { + const char_t* p = find_char(name, ':'); + + return p ? p + 1 : name; + } + + const char_t* namespace_uri(const xml_node& node) + { + const char_t* pos = find_char(node.name(), ':'); + + string_t ns = PUGIXML_TEXT("xmlns"); + + if (pos) + { + ns += ':'; + ns.append(node.name(), pos); + } + + xml_node p = node; + + while (p) + { + xml_attribute a = p.attribute(ns.c_str()); + + if (a) return a.value(); + + p = p.parent(); + } + + return PUGIXML_TEXT(""); + } + + const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent) + { + const char_t* pos = find_char(attr.name(), ':'); + + // Default namespace does not apply to attributes + if (!pos) return PUGIXML_TEXT(""); + + string_t ns = PUGIXML_TEXT("xmlns:"); + ns.append(attr.name(), pos); + + xml_node p = parent; + + while (p) + { + xml_attribute a = p.attribute(ns.c_str()); + + if (a) return a.value(); + + p = p.parent(); + } + + return PUGIXML_TEXT(""); + } + + struct equal_to + { + template bool operator()(const T& lhs, const T& rhs) const + { + return lhs == rhs; + } + }; + + struct not_equal_to + { + template bool operator()(const T& lhs, const T& rhs) const + { + return lhs != rhs; + } + }; + + struct less + { + template bool operator()(const T& lhs, const T& rhs) const + { + return lhs < rhs; + } + }; + + struct less_equal + { + template bool operator()(const T& lhs, const T& rhs) const + { + return lhs <= rhs; + } + }; +} + +namespace pugi +{ + xpath_exception::xpath_exception(const char* message): m_message(message) + { + } + + const char* xpath_exception::what() const throw() + { + return m_message; + } + + const size_t xpath_memory_block_size = 4096; ///< Memory block size, 4 kb + + class xpath_allocator + { + // disable copy ctor and assignment + xpath_allocator(const xpath_allocator&); + xpath_allocator& operator=(const xpath_allocator&); + + struct memory_block + { + memory_block(): next(0), size(0) + { + } + + memory_block* next; + size_t size; + + char data[xpath_memory_block_size]; + }; + + memory_block* m_root; + + public: + xpath_allocator(): m_root(0) + { + m_root = new memory_block; + } + + ~xpath_allocator() + { + while (m_root) + { + memory_block* cur = m_root->next; + delete m_root; + m_root = cur; + } + } + + void* alloc(size_t size) + { + // align size so that we're able to store pointers in subsequent blocks + size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1); + + if (m_root->size + size <= xpath_memory_block_size) + { + void* buf = m_root->data + m_root->size; + m_root->size += size; + return buf; + } + else + { + memory_block* block; + + if (size > xpath_memory_block_size) + block = static_cast(operator new(size + sizeof(memory_block) - xpath_memory_block_size)); + else + block = new memory_block; + + block->next = m_root; + block->size = size; + + m_root = block; + + return block->data; + } + } + + void* node(); + }; + + xpath_node::xpath_node() + { + } + + xpath_node::xpath_node(const xml_node& node): m_node(node) + { + } + + xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): m_node(parent), m_attribute(attribute) + { + } + + xml_node xpath_node::node() const + { + return m_attribute ? xml_node() : m_node; + } + + xml_attribute xpath_node::attribute() const + { + return m_attribute; + } + + xml_node xpath_node::parent() const + { + return m_attribute ? m_node : m_node.parent(); + } + + xpath_node::operator xpath_node::unspecified_bool_type() const + { + return (m_node || m_attribute) ? &xpath_node::m_node : 0; + } + + bool xpath_node::operator!() const + { + return !(m_node || m_attribute); + } + + bool xpath_node::operator==(const xpath_node& n) const + { + return m_node == n.m_node && m_attribute == n.m_attribute; + } + + bool xpath_node::operator!=(const xpath_node& n) const + { + return m_node != n.m_node || m_attribute != n.m_attribute; + } + +#ifdef __BORLANDC__ + bool operator&&(const xpath_node& lhs, bool rhs) + { + return (bool)lhs && rhs; + } + + bool operator||(const xpath_node& lhs, bool rhs) + { + return (bool)lhs || rhs; + } +#endif + + xpath_node_set::xpath_node_set(): m_type(type_unsorted), m_begin(&m_storage), m_end(&m_storage), m_eos(&m_storage + 1) + { + } + + xpath_node_set::~xpath_node_set() + { + if (m_begin != &m_storage) delete[] m_begin; + } + + xpath_node_set::xpath_node_set(const xpath_node_set& ns): m_type(type_unsorted), m_begin(&m_storage), m_end(&m_storage), m_eos(&m_storage + 1) + { + *this = ns; + } + + xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) + { + if (&ns == this) return *this; + + if (m_begin != &m_storage) delete[] m_begin; + + m_begin = m_end = m_eos = 0; + m_type = ns.m_type; + + if (ns.size() == 1) + { + m_storage = *ns.m_begin; + m_begin = &m_storage; + m_end = m_eos = &m_storage + 1; + } + else + { + append(ns.begin(), ns.end()); + } + + return *this; + } + + xpath_node_set::type_t xpath_node_set::type() const + { + return m_type; + } + + size_t xpath_node_set::size() const + { + return m_end - m_begin; + } + + bool xpath_node_set::empty() const + { + return size() == 0; + } + + const xpath_node& xpath_node_set::operator[](size_t index) const + { + assert(index < size()); + return m_begin[index]; + } + + xpath_node_set::iterator xpath_node_set::mut_begin() + { + return m_begin; + } + + xpath_node_set::const_iterator xpath_node_set::begin() const + { + return m_begin; + } + + xpath_node_set::const_iterator xpath_node_set::end() const + { + return m_end; + } + + void xpath_node_set::sort(bool reverse) + { + std::sort(m_begin, m_end, document_order_comparator()); + + if (reverse) + std::reverse(m_begin, m_end); + + m_type = reverse ? type_sorted_reverse : type_sorted; + } + + void xpath_node_set::push_back(const xpath_node& n) + { + if (m_end == m_eos) + append(&n, &n + 1); + else + { + *m_end = n; + ++m_end; + } + } + + void xpath_node_set::append(const_iterator begin, const_iterator end) + { + if (begin == end) return; + + size_t count = end - begin; + size_t size = m_end - m_begin; + size_t capacity = m_eos - m_begin; + + if (capacity < size + count) + { + if (capacity < 2) capacity = 2; + + while (capacity < size + count) capacity += capacity / 2; + + xpath_node* storage = new xpath_node[capacity]; + std::copy(m_begin, m_end, storage); + + if (m_begin != &m_storage) delete[] m_begin; + + m_begin = storage; + m_end = storage + size; + m_eos = storage + capacity; + } + + std::copy(begin, end, m_end); + m_end += count; + } + + void xpath_node_set::truncate(iterator it) + { + m_end = it; + } + + xpath_node xpath_node_set::first() const + { + if (empty()) return xpath_node(); + + switch (m_type) + { + case type_sorted: return *m_begin; + case type_sorted_reverse: return *(m_end - 1); + case type_unsorted: return *std::min_element(begin(), end(), document_order_comparator()); + default: return xpath_node(); + } + } + + void xpath_node_set::remove_duplicates() + { + if (m_type == type_unsorted) + { + std::sort(m_begin, m_end, duplicate_comparator()); + } + + truncate(std::unique(m_begin, m_end)); + } + + struct xpath_context + { + xml_node root; + xpath_node n; + size_t position, size; + }; + + enum lexeme_t + { + lex_none = 0, + lex_equal, + lex_not_equal, + lex_less, + lex_greater, + lex_less_or_equal, + lex_greater_or_equal, + lex_plus, + lex_minus, + lex_multiply, + lex_union, + lex_var_ref, + lex_open_brace, + lex_close_brace, + lex_quoted_string, + lex_number, + lex_slash, + lex_double_slash, + lex_open_square_brace, + lex_close_square_brace, + lex_string, + lex_comma, + lex_axis_attribute, + lex_dot, + lex_double_dot, + lex_double_colon, + lex_eof + }; + + struct xpath_lexer_string + { + const char_t* begin; + const char_t* end; + + xpath_lexer_string(): begin(0), end(0) + { + } + + bool operator==(const char_t* other) const + { + size_t length = static_cast(end - begin); + + return impl::strequalrange(other, begin, length); + } + }; + + class xpath_lexer + { + // disable copy ctor and assignment + xpath_lexer(const xpath_lexer&); + xpath_lexer& operator=(const xpath_lexer&); + + private: + const char_t* m_cur; + xpath_lexer_string m_cur_lexeme_contents; + + lexeme_t m_cur_lexeme; + + void contents_clear() + { + m_cur_lexeme_contents = xpath_lexer_string(); + } + + public: + explicit xpath_lexer(const char_t* query): m_cur(query) + { + next(); + } + + const char_t* state() const + { + return m_cur; + } + + void next() + { + contents_clear(); + + const char_t* cur = m_cur; + + while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; + + switch (*cur) + { + case 0: + m_cur_lexeme = lex_eof; + break; + + case '>': + if (*(cur+1) == '=') + { + cur += 2; + m_cur_lexeme = lex_greater_or_equal; + } + else + { + cur += 1; + m_cur_lexeme = lex_greater; + } + break; + + case '<': + if (*(cur+1) == '=') + { + cur += 2; + m_cur_lexeme = lex_less_or_equal; + } + else + { + cur += 1; + m_cur_lexeme = lex_less; + } + break; + + case '!': + if (*(cur+1) == '=') + { + cur += 2; + m_cur_lexeme = lex_not_equal; + } + else + { + m_cur_lexeme = lex_none; + } + break; + + case '=': + cur += 1; + m_cur_lexeme = lex_equal; + + break; + + case '+': + cur += 1; + m_cur_lexeme = lex_plus; + + break; + + case '-': + cur += 1; + m_cur_lexeme = lex_minus; + + break; + + case '*': + cur += 1; + m_cur_lexeme = lex_multiply; + + break; + + case '|': + cur += 1; + m_cur_lexeme = lex_union; + + break; + + case '$': + cur += 1; + m_cur_lexeme = lex_var_ref; + + break; + + case '(': + cur += 1; + m_cur_lexeme = lex_open_brace; + + break; + + case ')': + cur += 1; + m_cur_lexeme = lex_close_brace; + + break; + + case '[': + cur += 1; + m_cur_lexeme = lex_open_square_brace; + + break; + + case ']': + cur += 1; + m_cur_lexeme = lex_close_square_brace; + + break; + + case ',': + cur += 1; + m_cur_lexeme = lex_comma; + + break; + + case '/': + if (*(cur+1) == '/') + { + cur += 2; + m_cur_lexeme = lex_double_slash; + } + else + { + cur += 1; + m_cur_lexeme = lex_slash; + } + break; + + case '.': + if (*(cur+1) == '.') + { + cur += 2; + m_cur_lexeme = lex_double_dot; + } + else if (IS_CHARTYPEX(*(cur+1), ctx_digit)) + { + m_cur_lexeme_contents.begin = cur; // . + + ++cur; + + while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; + + m_cur_lexeme_contents.end = cur; + + m_cur_lexeme = lex_number; + } + else + { + cur += 1; + m_cur_lexeme = lex_dot; + } + break; + + case '@': + cur += 1; + m_cur_lexeme = lex_axis_attribute; + + break; + + case '"': + case '\'': + { + char_t terminator = *cur; + + ++cur; + + m_cur_lexeme_contents.begin = cur; + while (*cur && *cur != terminator) cur++; + m_cur_lexeme_contents.end = cur; + + if (!*cur) + m_cur_lexeme = lex_none; + else + { + cur += 1; + m_cur_lexeme = lex_quoted_string; + } + + break; + } + + case ':': + if (*(cur+1) == ':') + { + cur += 2; + m_cur_lexeme = lex_double_colon; + } + else + { + m_cur_lexeme = lex_none; + } + break; + + default: + if (IS_CHARTYPEX(*cur, ctx_digit)) + { + m_cur_lexeme_contents.begin = cur; + + while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; + + if (*cur == '.') + { + cur++; + + while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; + } + + m_cur_lexeme_contents.end = cur; + + m_cur_lexeme = lex_number; + } + else if (IS_CHARTYPEX(*cur, ctx_start_symbol)) + { + m_cur_lexeme_contents.begin = cur; + + while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; + + if (cur[0] == ':') + { + if (cur[1] == '*') // namespace test ncname:* + { + cur += 2; // :* + } + else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname + { + cur++; // : + + while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; + } + } + + m_cur_lexeme_contents.end = cur; + + while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; + + m_cur_lexeme = lex_string; + } + else + { + throw xpath_exception("Unrecognized token"); + } + } + + m_cur = cur; + } + + lexeme_t current() const + { + return m_cur_lexeme; + } + + const xpath_lexer_string& contents() const + { + return m_cur_lexeme_contents; + } + }; + + enum ast_type_t + { + ast_none, + ast_op_or, // left or right + ast_op_and, // left and right + ast_op_equal, // left = right + ast_op_not_equal, // left != right + ast_op_less, // left < right + ast_op_greater, // left > right + ast_op_less_or_equal, // left <= right + ast_op_greater_or_equal, // left >= right + ast_op_add, // left + right + ast_op_subtract, // left - right + ast_op_multiply, // left * right + ast_op_divide, // left / right + ast_op_mod, // left % right + ast_op_negate, // left - right + ast_op_union, // left | right + ast_predicate, // apply predicate to set; next points to next predicate + ast_filter, // select * from left where right + ast_filter_posinv, // select * from left where right; proximity position invariant + ast_string_constant, // string constant + ast_number_constant, // number constant + ast_func_last, // last() + ast_func_position, // position() + ast_func_count, // count(left) + ast_func_id, // id(left) + ast_func_local_name_0, // local-name() + ast_func_local_name_1, // local-name(left) + ast_func_namespace_uri_0, // namespace-uri() + ast_func_namespace_uri_1, // namespace-uri(left) + ast_func_name_0, // name() + ast_func_name_1, // name(left) + ast_func_string_0, // string() + ast_func_string_1, // string(left) + ast_func_concat, // concat(left, right, siblings) + ast_func_starts_with, // starts_with(left, right) + ast_func_contains, // contains(left, right) + ast_func_substring_before, // substring-before(left, right) + ast_func_substring_after, // substring-after(left, right) + ast_func_substring_2, // substring(left, right) + ast_func_substring_3, // substring(left, right, third) + ast_func_string_length_0, // string-length() + ast_func_string_length_1, // string-length(left) + ast_func_normalize_space_0, // normalize-space() + ast_func_normalize_space_1, // normalize-space(left) + ast_func_translate, // translate(left, right, third) + ast_func_boolean, // boolean(left) + ast_func_not, // not(left) + ast_func_true, // true() + ast_func_false, // false() + ast_func_lang, // lang(left) + ast_func_number_0, // number() + ast_func_number_1, // number(left) + ast_func_sum, // sum(left) + ast_func_floor, // floor(left) + ast_func_ceiling, // ceiling(left) + ast_func_round, // round(left) + ast_step, // process set left with step + ast_step_root // select root node + }; + + enum axis_t + { + axis_ancestor, + axis_ancestor_or_self, + axis_attribute, + axis_child, + axis_descendant, + axis_descendant_or_self, + axis_following, + axis_following_sibling, + axis_namespace, + axis_parent, + axis_preceding, + axis_preceding_sibling, + axis_self + }; + + enum nodetest_t + { + nodetest_none, + nodetest_name, + nodetest_type_node, + nodetest_type_comment, + nodetest_type_pi, + nodetest_type_text, + nodetest_pi, + nodetest_all, + nodetest_all_in_namespace + }; + + template struct axis_to_type + { + static const axis_t axis; + }; + + template const axis_t axis_to_type::axis = N; + + class xpath_ast_node + { + private: + // node type + char m_type; + char m_rettype; + + // for ast_step / ast_predicate + char m_axis; + char m_test; + + // tree node structure + xpath_ast_node* m_left; + xpath_ast_node* m_right; + xpath_ast_node* m_next; + + union + { + // value for ast_string_constant + const char_t* string; + // value for ast_number_constant + double number; + // node test for ast_step (node name/namespace/node type/pi target) + const char_t* nodetest; + } m_data; + + xpath_ast_node(const xpath_ast_node&); + xpath_ast_node& operator=(const xpath_ast_node&); + + template static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp) + { + xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); + + if (lt != xpath_type_node_set && rt != xpath_type_node_set) + { + if (lt == xpath_type_boolean || rt == xpath_type_boolean) + return comp(lhs->eval_boolean(c), rhs->eval_boolean(c)); + else if (lt == xpath_type_number || rt == xpath_type_number) + return comp(lhs->eval_number(c), rhs->eval_number(c)); + else if (lt == xpath_type_string || rt == xpath_type_string) + return comp(lhs->eval_string(c), rhs->eval_string(c)); + } + else if (lt == xpath_type_node_set && rt == xpath_type_node_set) + { + xpath_node_set ls = lhs->eval_node_set(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(string_value(*li), string_value(*ri))) + return true; + } + + return false; + } + else + { + if (lt == xpath_type_node_set) + { + std::swap(lhs, rhs); + std::swap(lt, rt); + } + + if (lt == xpath_type_boolean) + return comp(lhs->eval_boolean(c), rhs->eval_boolean(c)); + else if (lt == xpath_type_number) + { + double l = lhs->eval_number(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) + return true; + } + + return false; + } + else if (lt == xpath_type_string) + { + string_t l = lhs->eval_string(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(l, string_value(*ri))) + return true; + } + + return false; + } + } + + assert(!"Wrong types"); + return false; + } + + template static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp) + { + xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); + + if (lt != xpath_type_node_set && rt != xpath_type_node_set) + return comp(lhs->eval_number(c), rhs->eval_number(c)); + else if (lt == xpath_type_node_set && rt == xpath_type_node_set) + { + xpath_node_set ls = lhs->eval_node_set(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) + { + double l = convert_string_to_number(string_value(*li).c_str()); + + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) + return true; + } + } + + return false; + } + else if (lt != xpath_type_node_set && rt == xpath_type_node_set) + { + double l = lhs->eval_number(c); + xpath_node_set rs = rhs->eval_node_set(c); + + for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) + { + if (comp(l, convert_string_to_number(string_value(*ri).c_str()))) + return true; + } + + return false; + } + else if (lt == xpath_type_node_set && rt != xpath_type_node_set) + { + xpath_node_set ls = lhs->eval_node_set(c); + double r = rhs->eval_number(c); + + for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li) + { + if (comp(convert_string_to_number(string_value(*li).c_str()), r)) + return true; + } + + return false; + } + else + { + assert(!"Wrong types"); + return false; + } + } + + void apply_predicate(xpath_node_set& ns, size_t first, xpath_ast_node* expr, const xpath_context& context) + { + xpath_context c; + c.root = context.root; + + size_t i = 1; + size_t size = ns.size() - first; + + xpath_node_set::iterator last = ns.mut_begin() + first; + + // remove_if... or well, sort of + for (xpath_node_set::iterator it = last; it != ns.end(); ++it, ++i) + { + c.n = *it; + c.position = i; + c.size = size; + + if (expr->rettype() == xpath_type_number) + { + if (expr->eval_number(c) == i) + *last++ = *it; + } + else if (expr->eval_boolean(c)) + *last++ = *it; + } + + ns.truncate(last); + } + + void apply_predicates(xpath_node_set& ns, size_t first, const xpath_context& context) + { + if (ns.size() <= first) return; + + for (xpath_ast_node* pred = m_right; pred; pred = pred->m_next) + { + apply_predicate(ns, first, pred->m_left, context); + } + } + + void step_push(xpath_node_set& ns, const xml_attribute& a, const xml_node& parent) + { + if (!a) return; + + const char_t* name = a.name(); + + // There are no attribute nodes corresponding to attributes that declare namespaces + // That is, "xmlns:..." or "xmlns" + if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return; + + switch (m_test) + { + case nodetest_name: + if (impl::strequal(name, m_data.nodetest)) ns.push_back(xpath_node(a, parent)); + break; + + case nodetest_type_node: + case nodetest_all: + ns.push_back(xpath_node(a, parent)); + break; + + case nodetest_all_in_namespace: + if (starts_with(name, m_data.nodetest)) + ns.push_back(xpath_node(a, parent)); + break; + + default: + ; + } + } + + void step_push(xpath_node_set& ns, const xml_node& n) + { + if (!n) return; + + switch (m_test) + { + case nodetest_name: + if (n.type() == node_element && impl::strequal(n.name(), m_data.nodetest)) ns.push_back(n); + break; + + case nodetest_type_node: + ns.push_back(n); + break; + + case nodetest_type_comment: + if (n.type() == node_comment) + ns.push_back(n); + break; + + case nodetest_type_text: + if (n.type() == node_pcdata || n.type() == node_cdata) + ns.push_back(n); + break; + + case nodetest_type_pi: + if (n.type() == node_pi) + ns.push_back(n); + break; + + case nodetest_pi: + if (n.type() == node_pi && impl::strequal(n.name(), m_data.nodetest)) + ns.push_back(n); + break; + + case nodetest_all: + if (n.type() == node_element) + ns.push_back(n); + break; + + case nodetest_all_in_namespace: + if (n.type() == node_element && starts_with(n.name(), m_data.nodetest)) + ns.push_back(n); + break; + + default: + assert(!"Unknown axis"); + } + } + + template void step_fill(xpath_node_set& ns, const xml_node& n, T) + { + const axis_t axis = T::axis; + + switch (axis) + { + case axis_attribute: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) + step_push(ns, a, n); + + break; + } + + case axis_child: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + for (xml_node c = n.first_child(); c; c = c.next_sibling()) + step_push(ns, c); + + break; + } + + case axis_descendant: + case axis_descendant_or_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + if (axis == axis_descendant_or_self) + step_push(ns, n); + + xml_node cur = n.first_child(); + + if (cur) + { + do + { + step_push(ns, cur); + + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + // Borland C++ workaround + while (!cur.next_sibling() && cur != n && (bool)cur.parent()) + cur = cur.parent(); + + if (cur != n) + cur = cur.next_sibling(); + } + } + while (cur && cur != n); + } + + break; + } + + case axis_following_sibling: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + for (xml_node c = n.next_sibling(); c; c = c.next_sibling()) + step_push(ns, c); + + break; + } + + case axis_preceding_sibling: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; + + for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling()) + step_push(ns, c); + + break; + } + + case axis_following: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + xml_node cur = n; + + // exit from this node so that we don't include descendants + while (cur && !cur.next_sibling()) cur = cur.parent(); + cur = cur.next_sibling(); + + if (cur) + { + for (;;) + { + step_push(ns, cur); + + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + while (cur && !cur.next_sibling()) cur = cur.parent(); + cur = cur.next_sibling(); + + if (!cur) break; + } + } + } + + break; + } + + case axis_preceding: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; + + xml_node cur = n; + + while (cur && !cur.previous_sibling()) cur = cur.parent(); + cur = cur.previous_sibling(); + + if (cur) + { + for (;;) + { + if (cur.last_child()) + cur = cur.last_child(); + else + { + // leaf node, can't be ancestor + step_push(ns, cur); + + if (cur.previous_sibling()) + cur = cur.previous_sibling(); + else + { + do + { + cur = cur.parent(); + if (!cur) break; + + if (!node_is_ancestor(cur, n)) step_push(ns, cur); + } + while (!cur.previous_sibling()); + + cur = cur.previous_sibling(); + + if (!cur) break; + } + } + } + } + + break; + } + + case axis_ancestor: + case axis_ancestor_or_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; + + if (axis == axis_ancestor_or_self) + step_push(ns, n); + + xml_node cur = n.parent(); + + while (cur) + { + step_push(ns, cur); + + cur = cur.parent(); + } + + break; + } + + case axis_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + step_push(ns, n); + + break; + } + + case axis_parent: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + if (n.parent()) step_push(ns, n.parent()); + + break; + } + + default: + assert(!"Unimplemented axis"); + } + } + + template void step_fill(xpath_node_set& ns, const xml_attribute& a, const xml_node& p, T v) + { + const axis_t axis = T::axis; + + switch (axis) + { + case axis_ancestor: + case axis_ancestor_or_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted; + + if (axis == axis_ancestor_or_self && m_test == nodetest_type_node) // reject attributes based on principal node type test + step_push(ns, a, p); + + xml_node cur = p; + + while (cur) + { + step_push(ns, cur); + + cur = cur.parent(); + } + + break; + } + + case axis_descendant_or_self: + case axis_self: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + if (m_test == nodetest_type_node) // reject attributes based on principal node type test + step_push(ns, a, p); + + break; + } + + case axis_following: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + xml_node cur = p; + + for (;;) + { + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + while (cur && !cur.next_sibling()) cur = cur.parent(); + cur = cur.next_sibling(); + + if (!cur) break; + } + + step_push(ns, cur); + } + + break; + } + + case axis_parent: + { + ns.m_type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted; + + step_push(ns, p); + + break; + } + + case axis_preceding: + { + // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding + step_fill(ns, p, v); + break; + } + + default: + assert(!"Unimplemented axis"); + } + } + + template void step_do(xpath_node_set& ns, const xpath_context& c, T v) + { + const axis_t axis = T::axis; + + assert(ns.empty()); + + switch (axis) + { + case axis_ancestor: + case axis_ancestor_or_self: + case axis_descendant_or_self: + case axis_following: + case axis_parent: + case axis_preceding: + case axis_self: + if (m_left) + { + xpath_node_set s = m_left->eval_node_set(c); + + for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it) + { + size_t size = ns.size(); + + if (it->node()) + step_fill(ns, it->node(), v); + else + step_fill(ns, it->attribute(), it->parent(), v); + + apply_predicates(ns, size, c); + } + } + else + { + if (c.n.node()) step_fill(ns, c.n.node(), v); + else step_fill(ns, c.n.attribute(), c.n.parent(), v); + + apply_predicates(ns, 0, c); + } + + break; + + case axis_following_sibling: + case axis_preceding_sibling: + case axis_attribute: + case axis_child: + case axis_descendant: + if (m_left) + { + xpath_node_set s = m_left->eval_node_set(c); + + for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it) + { + size_t size = ns.size(); + + if (it->node()) + step_fill(ns, it->node(), v); + + apply_predicates(ns, size, c); + } + } + else if (c.n.node()) + { + step_fill(ns, c.n.node(), v); + + apply_predicates(ns, 0, c); + } + + break; + + case axis_namespace: + break; + + default: + assert(!"Unimplemented axis"); + } + } + + static const char_t* duplicate_string(const xpath_lexer_string& value, xpath_allocator& a) + { + if (value.begin) + { + size_t length = static_cast(value.end - value.begin); + + char_t* c = static_cast(a.alloc((length + 1) * sizeof(char_t))); + memcpy(c, value.begin, length * sizeof(char_t)); + c[length] = 0; + + return c; + } + else return 0; + } + public: + xpath_ast_node(ast_type_t type, xpath_value_type rettype, const xpath_lexer_string& value, xpath_allocator& a): + m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0) + { + assert(type == ast_string_constant); + m_data.string = duplicate_string(value, a); + } + + xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value): + m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(0), m_right(0), m_next(0) + { + assert(type == ast_number_constant); + m_data.number = value; + } + + xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0): + m_type((char)type), m_rettype((char)rettype), m_axis(0), m_test(0), m_left(left), m_right(right), m_next(0) + { + } + + xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const xpath_lexer_string& contents, xpath_allocator& a): + m_type((char)type), m_rettype(xpath_type_node_set), m_axis((char)axis), m_test((char)test), m_left(left), m_right(0), m_next(0) + { + m_data.nodetest = duplicate_string(contents, a); + } + + void set_next(xpath_ast_node* value) + { + m_next = value; + } + + void set_right(xpath_ast_node* value) + { + m_right = value; + } + + bool eval_boolean(const xpath_context& c) + { + switch (m_type) + { + case ast_op_or: + if (m_left->eval_boolean(c)) return true; + else return m_right->eval_boolean(c); + + case ast_op_and: + if (!m_left->eval_boolean(c)) return false; + else return m_right->eval_boolean(c); + + case ast_op_equal: + return compare_eq(m_left, m_right, c, equal_to()); + + case ast_op_not_equal: + return compare_eq(m_left, m_right, c, not_equal_to()); + + case ast_op_less: + return compare_rel(m_left, m_right, c, less()); + + case ast_op_greater: + return compare_rel(m_right, m_left, c, less()); + + case ast_op_less_or_equal: + return compare_rel(m_left, m_right, c, less_equal()); + + case ast_op_greater_or_equal: + return compare_rel(m_right, m_left, c, less_equal()); + + case ast_func_starts_with: + return starts_with(m_left->eval_string(c).c_str(), m_right->eval_string(c).c_str()); + + case ast_func_contains: + { + string_t lr = m_left->eval_string(c); + string_t rr = m_right->eval_string(c); + + return rr.empty() || lr.find(rr) != string_t::npos; + } + + case ast_func_boolean: + return m_left->eval_boolean(c); + + case ast_func_not: + return !m_left->eval_boolean(c); + + case ast_func_true: + return true; + + case ast_func_false: + return false; + + case ast_func_lang: + { + if (c.n.attribute()) return false; + + string_t lang = m_left->eval_string(c); + + for (xml_node n = c.n.node(); n; n = n.parent()) + { + xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); + + if (a) + { + const char_t* value = a.value(); + + // strnicmp / strncasecmp is not portable + for (const char_t* lit = lang.c_str(); *lit; ++lit) + { + if (tolower(*lit) != tolower(*value)) return false; + ++value; + } + + return *value == 0 || *value == '-'; + } + } + + return false; + } + + default: + { + switch (m_rettype) + { + case xpath_type_number: + return convert_number_to_boolean(eval_number(c)); + + case xpath_type_string: + return !eval_string(c).empty(); + + case xpath_type_node_set: + return !eval_node_set(c).empty(); + + default: + assert(!"Wrong expression for return type boolean"); + return false; + } + } + } + } + + double eval_number(const xpath_context& c) + { + switch (m_type) + { + case ast_op_add: + return m_left->eval_number(c) + m_right->eval_number(c); + + case ast_op_subtract: + return m_left->eval_number(c) - m_right->eval_number(c); + + case ast_op_multiply: + return m_left->eval_number(c) * m_right->eval_number(c); + + case ast_op_divide: + return m_left->eval_number(c) / m_right->eval_number(c); + + case ast_op_mod: + return fmod(m_left->eval_number(c), m_right->eval_number(c)); + + case ast_op_negate: + return -m_left->eval_number(c); + + case ast_number_constant: + return m_data.number; + + case ast_func_last: + return (double)c.size; + + case ast_func_position: + return (double)c.position; + + case ast_func_count: + return (double)m_left->eval_node_set(c).size(); + + case ast_func_string_length_0: + return (double)string_value(c.n).size(); + + case ast_func_string_length_1: + return (double)m_left->eval_string(c).size(); + + case ast_func_number_0: + return convert_string_to_number(string_value(c.n).c_str()); + + case ast_func_number_1: + return m_left->eval_number(c); + + case ast_func_sum: + { + double r = 0; + + xpath_node_set ns = m_left->eval_node_set(c); + + for (xpath_node_set::const_iterator it = ns.begin(); it != ns.end(); ++it) + r += convert_string_to_number(string_value(*it).c_str()); + + return r; + } + + case ast_func_floor: + { + double r = m_left->eval_number(c); + + return r == r ? floor(r) : r; + } + + case ast_func_ceiling: + { + double r = m_left->eval_number(c); + + return r == r ? ceil(r) : r; + } + + case ast_func_round: + return round_nearest_nzero(m_left->eval_number(c)); + + default: + { + switch (m_rettype) + { + case xpath_type_boolean: + return eval_boolean(c) ? 1 : 0; + + case xpath_type_string: + return convert_string_to_number(eval_string(c).c_str()); + + case xpath_type_node_set: + return convert_string_to_number(eval_string(c).c_str()); + + default: + assert(!"Wrong expression for return type number"); + return 0; + } + + } + } + } + + string_t eval_string(const xpath_context& c) + { + switch (m_type) + { + case ast_string_constant: + return m_data.string; + + case ast_func_local_name_0: + { + xpath_node na = c.n; + + if (na.attribute()) return local_name(na.attribute().name()); + else return local_name(na.node().name()); + } + + case ast_func_local_name_1: + { + xpath_node_set ns = m_left->eval_node_set(c); + if (ns.empty()) return string_t(); + + xpath_node na = ns.first(); + + if (na.attribute()) return local_name(na.attribute().name()); + else return local_name(na.node().name()); + } + + case ast_func_name_0: + { + xpath_node na = c.n; + + if (na.attribute()) return na.attribute().name(); + else return na.node().name(); + } + + case ast_func_name_1: + { + xpath_node_set ns = m_left->eval_node_set(c); + if (ns.empty()) return string_t(); + + xpath_node na = ns.first(); + + if (na.attribute()) return na.attribute().name(); + else return na.node().name(); + } + + case ast_func_namespace_uri_0: + { + xpath_node na = c.n; + + if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); + else return namespace_uri(na.node()); + } + + case ast_func_namespace_uri_1: + { + xpath_node_set ns = m_left->eval_node_set(c); + if (ns.empty()) return string_t(); + + xpath_node na = ns.first(); + + if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); + else return namespace_uri(na.node()); + } + + case ast_func_string_0: + return string_value(c.n); + + case ast_func_string_1: + return m_left->eval_string(c); + + case ast_func_concat: + { + string_t r = m_left->eval_string(c); + + for (xpath_ast_node* n = m_right; n; n = n->m_next) + r += n->eval_string(c); + + return r; + } + + case ast_func_substring_before: + { + string_t s = m_left->eval_string(c); + string_t::size_type pos = s.find(m_right->eval_string(c)); + + if (pos == string_t::npos) return string_t(); + else return string_t(s.begin(), s.begin() + pos); + } + + case ast_func_substring_after: + { + string_t s = m_left->eval_string(c); + string_t p = m_right->eval_string(c); + + string_t::size_type pos = s.find(p); + + if (pos == string_t::npos) return string_t(); + else return string_t(s.begin() + pos + p.length(), s.end()); + } + + case ast_func_substring_2: + { + string_t s = m_left->eval_string(c); + double first = round_nearest(m_right->eval_number(c)); + + if (is_nan(first)) return string_t(); // NaN + else if (first >= s.length() + 1) return string_t(); + + size_t pos = first < 1 ? 1 : (size_t)first; + + return s.substr(pos - 1); + } + + case ast_func_substring_3: + { + string_t s = m_left->eval_string(c); + double first = round_nearest(m_right->eval_number(c)); + double last = first + round_nearest(m_right->m_next->eval_number(c)); + + if (is_nan(first) || is_nan(last)) return string_t(); + else if (first >= s.length() + 1) return string_t(); + else if (first >= last) return string_t(); + + size_t pos = first < 1 ? 1 : (size_t)first; + size_t end = last >= s.length() + 1 ? s.length() + 1 : (size_t)last; + + size_t size_requested = end - pos; + size_t size_to_end = s.length() - pos + 1; + + return s.substr(pos - 1, size_requested < size_to_end ? size_requested : size_to_end); + } + + case ast_func_normalize_space_0: + case ast_func_normalize_space_1: + { + string_t s = m_type == ast_func_normalize_space_0 ? string_value(c.n) : m_left->eval_string(c); + + string_t r; + r.reserve(s.size()); + + for (string_t::const_iterator it = s.begin(); it != s.end(); ++it) + { + if (IS_CHARTYPEX(*it, ctx_space)) + { + if (!r.empty() && r[r.size() - 1] != ' ') + r += ' '; + } + else r += *it; + } + + string_t::size_type pos = r.find_last_not_of(' '); + if (pos == string_t::npos) r = string_t(); + else r.erase(r.begin() + pos + 1, r.end()); + + return r; + } + + case ast_func_translate: + { + string_t s = m_left->eval_string(c); + string_t from = m_right->eval_string(c); + string_t to = m_right->m_next->eval_string(c); + + for (string_t::iterator it = s.begin(); it != s.end(); ) + { + string_t::size_type pos = from.find(*it); + + if (pos == string_t::npos) + ++it; + else if (pos >= to.length()) + it = s.erase(it); + else + *it++ = to[pos]; + } + + return s; + } + + default: + { + switch (m_rettype) + { + case xpath_type_boolean: + return eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"); + + case xpath_type_number: + return convert_number_to_string(eval_number(c)); + + case xpath_type_node_set: + { + xpath_node_set ns = eval_node_set(c); + return ns.empty() ? string_t() : string_value(ns.first()); + } + + default: + assert(!"Wrong expression for return type string"); + return string_t(); + } + } + } + } + + xpath_node_set eval_node_set(const xpath_context& c) + { + switch (m_type) + { + case ast_op_union: + { + xpath_node_set ls = m_left->eval_node_set(c); + xpath_node_set rs = m_right->eval_node_set(c); + + // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother + ls.m_type = xpath_node_set::type_unsorted; + + ls.append(rs.begin(), rs.end()); + + ls.remove_duplicates(); + + return ls; + } + + case ast_filter: + case ast_filter_posinv: + { + xpath_node_set set = m_left->eval_node_set(c); + + // either expression is a number or it contains position() call; sort by document order + if (m_type == ast_filter) set.sort(); + + apply_predicate(set, 0, m_right, c); + + return set; + } + + case ast_func_id: + return xpath_node_set(); + + case ast_step: + { + xpath_node_set ns; + + switch (m_axis) + { + case axis_ancestor: + step_do(ns, c, axis_to_type()); + break; + + case axis_ancestor_or_self: + step_do(ns, c, axis_to_type()); + break; + + case axis_attribute: + step_do(ns, c, axis_to_type()); + break; + + case axis_child: + step_do(ns, c, axis_to_type()); + break; + + case axis_descendant: + step_do(ns, c, axis_to_type()); + break; + + case axis_descendant_or_self: + step_do(ns, c, axis_to_type()); + break; + + case axis_following: + step_do(ns, c, axis_to_type()); + break; + + case axis_following_sibling: + step_do(ns, c, axis_to_type()); + break; + + case axis_namespace: + step_do(ns, c, axis_to_type()); + break; + + case axis_parent: + step_do(ns, c, axis_to_type()); + break; + + case axis_preceding: + step_do(ns, c, axis_to_type()); + break; + + case axis_preceding_sibling: + step_do(ns, c, axis_to_type()); + break; + + case axis_self: + step_do(ns, c, axis_to_type()); + break; + } + + ns.remove_duplicates(); + + return ns; + } + + case ast_step_root: + { + xpath_node_set ns; + + if (c.root) + { + ns.push_back(c.root); + + apply_predicates(ns, 0, c); + } + + return ns; + } + + default: + assert(!"Wrong expression for return type node set"); + return xpath_node_set(); + } + } + + bool is_posinv() + { + switch (m_type) + { + case ast_func_position: + return false; + + case ast_string_constant: + case ast_number_constant: + // $$ case ast_variable: + return true; + + case ast_step: + case ast_step_root: + return true; + + case ast_predicate: + case ast_filter: + case ast_filter_posinv: + return true; + + default: + if (m_left && !m_left->is_posinv()) return false; + + for (xpath_ast_node* n = m_right; n; n = n->m_next) + if (!n->is_posinv()) return false; + + return true; + } + } + + xpath_value_type rettype() const + { + return static_cast(m_rettype); + } + }; + + void* xpath_allocator::node() + { + return alloc(sizeof(xpath_ast_node)); + } + + class xpath_parser + { + private: + xpath_allocator& m_alloc; + xpath_lexer m_lexer; + + xpath_parser(const xpath_parser&); + xpath_parser& operator=(const xpath_parser&); + + xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) + { + assert(argc <= 1); + + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw xpath_exception("Function has to be applied to node set"); + + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); + } + + xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) + { + switch (name.begin[0]) + { + case 'b': + if (name == PUGIXML_TEXT("boolean") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); + + break; + + case 'c': + if (name == PUGIXML_TEXT("count") && argc == 1) + { + if (args[0]->rettype() != xpath_type_node_set) throw xpath_exception("count() has to be applied to node set"); + return new (m_alloc.node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); + } + else if (name == PUGIXML_TEXT("contains") && argc == 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("concat") && argc >= 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("ceiling") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); + + break; + + case 'f': + if (name == PUGIXML_TEXT("false") && argc == 0) + return new (m_alloc.node()) xpath_ast_node(ast_func_false, xpath_type_boolean); + else if (name == PUGIXML_TEXT("floor") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); + + break; + + case 'i': + if (name == PUGIXML_TEXT("id") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); + + break; + + case 'l': + if (name == PUGIXML_TEXT("last") && argc == 0) + return new (m_alloc.node()) xpath_ast_node(ast_func_last, xpath_type_number); + else if (name == PUGIXML_TEXT("lang") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); + else if (name == PUGIXML_TEXT("local-name") && argc <= 1) + return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); + + break; + + case 'n': + if (name == PUGIXML_TEXT("name") && argc <= 1) + return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); + else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) + return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); + else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("not") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); + else if (name == PUGIXML_TEXT("number") && argc <= 1) + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); + + break; + + case 'p': + if (name == PUGIXML_TEXT("position") && argc == 0) + return new (m_alloc.node()) xpath_ast_node(ast_func_position, xpath_type_number); + + break; + + case 'r': + if (name == PUGIXML_TEXT("round") && argc == 1) + return new (m_alloc.node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); + + break; + + case 's': + if (name == PUGIXML_TEXT("string") && argc <= 1) + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); + else if (name == PUGIXML_TEXT("string-length") && argc <= 1) + return new (m_alloc.node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]); + else if (name == PUGIXML_TEXT("starts-with") && argc == 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring-before") && argc == 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring-after") && argc == 2) + return new (m_alloc.node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) + return new (m_alloc.node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("sum") && argc == 1) + { + if (args[0]->rettype() != xpath_type_node_set) throw xpath_exception("sum() has to be applied to node set"); + return new (m_alloc.node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); + } + + break; + + case 't': + if (name == PUGIXML_TEXT("translate") && argc == 3) + return new (m_alloc.node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("true") && argc == 0) + return new (m_alloc.node()) xpath_ast_node(ast_func_true, xpath_type_boolean); + + break; + } + + throw xpath_exception("Unrecognized function or wrong parameter count"); + + #ifdef __DMC__ + return 0; // Digital Mars C++ + #endif + } + + axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) + { + specified = true; + + switch (name.begin[0]) + { + case 'a': + if (name == PUGIXML_TEXT("ancestor")) + return axis_ancestor; + else if (name == PUGIXML_TEXT("ancestor-or-self")) + return axis_ancestor_or_self; + else if (name == PUGIXML_TEXT("attribute")) + return axis_attribute; + + break; + + case 'c': + if (name == PUGIXML_TEXT("child")) + return axis_child; + + break; + + case 'd': + if (name == PUGIXML_TEXT("descendant")) + return axis_descendant; + else if (name == PUGIXML_TEXT("descendant-or-self")) + return axis_descendant_or_self; + + break; + + case 'f': + if (name == PUGIXML_TEXT("following")) + return axis_following; + else if (name == PUGIXML_TEXT("following-sibling")) + return axis_following_sibling; + + break; + + case 'n': + if (name == PUGIXML_TEXT("namespace")) + return axis_namespace; + + break; + + case 'p': + if (name == PUGIXML_TEXT("parent")) + return axis_parent; + else if (name == PUGIXML_TEXT("preceding")) + return axis_preceding; + else if (name == PUGIXML_TEXT("preceding-sibling")) + return axis_preceding_sibling; + + break; + + case 's': + if (name == PUGIXML_TEXT("self")) + return axis_self; + + break; + } + + specified = false; + return axis_child; + } + + nodetest_t parse_node_test_type(const xpath_lexer_string& name) + { + switch (name.begin[0]) + { + case 'c': + if (name == PUGIXML_TEXT("comment")) + return nodetest_type_comment; + + break; + + case 'n': + if (name == PUGIXML_TEXT("node")) + return nodetest_type_node; + + break; + + case 'p': + if (name == PUGIXML_TEXT("processing-instruction")) + return nodetest_type_pi; + + break; + + case 't': + if (name == PUGIXML_TEXT("text")) + return nodetest_type_text; + + break; + } + + return nodetest_none; + } + + // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall + xpath_ast_node* parse_primary_expression() + { + switch (m_lexer.current()) + { + case lex_var_ref: + { + throw xpath_exception("Variables are not supported"); + + #ifdef __DMC__ + return 0; // Digital Mars C++ + #endif + } + + case lex_open_brace: + { + m_lexer.next(); + + xpath_ast_node* n = parse_expression(); + + if (m_lexer.current() != lex_close_brace) + throw xpath_exception("Unmatched braces"); + + m_lexer.next(); + + return n; + } + + case lex_quoted_string: + { + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_string_constant, xpath_type_string, m_lexer.contents(), m_alloc); + m_lexer.next(); + + return n; + } + + case lex_number: + { + double value = convert_string_to_number(m_lexer.contents().begin, m_lexer.contents().end); + + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); + m_lexer.next(); + + return n; + } + + case lex_string: + { + xpath_ast_node* args[2] = {0}; + size_t argc = 0; + + xpath_lexer_string function = m_lexer.contents(); + m_lexer.next(); + + xpath_ast_node* last_arg = 0; + + if (m_lexer.current() != lex_open_brace) + throw xpath_exception("Unrecognized function call"); + m_lexer.next(); + + if (m_lexer.current() != lex_close_brace) + args[argc++] = parse_expression(); + + while (m_lexer.current() != lex_close_brace) + { + if (m_lexer.current() != lex_comma) + throw xpath_exception("No comma between function arguments"); + m_lexer.next(); + + xpath_ast_node* n = parse_expression(); + + if (argc < 2) args[argc] = n; + else last_arg->set_next(n); + + argc++; + last_arg = n; + } + + m_lexer.next(); + + return parse_function(function, argc, args); + } + + default: + throw xpath_exception("Unrecognizable primary expression"); + #ifdef __DMC__ + return 0; // Digital Mars C++ + #endif + } + } + + // FilterExpr ::= PrimaryExpr | FilterExpr Predicate + // Predicate ::= '[' PredicateExpr ']' + // PredicateExpr ::= Expr + xpath_ast_node* parse_filter_expression() + { + xpath_ast_node* n = parse_primary_expression(); + + while (m_lexer.current() == lex_open_square_brace) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_expression(); + + if (n->rettype() != xpath_type_node_set) throw xpath_exception("Predicate has to be applied to node set"); + + bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv(); + + n = new (m_alloc.node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr); + + if (m_lexer.current() != lex_close_square_brace) + throw xpath_exception("Unmatched square brace"); + + m_lexer.next(); + } + + return n; + } + + // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep + // AxisSpecifier ::= AxisName '::' | '@'? + // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' + // NameTest ::= '*' | NCName ':' '*' | QName + // AbbreviatedStep ::= '.' | '..' + xpath_ast_node* parse_step(xpath_ast_node* set) + { + if (set && set->rettype() != xpath_type_node_set) + throw xpath_exception("Step has to be applied to node set"); + + bool axis_specified = false; + axis_t axis = axis_child; // implied child axis + + if (m_lexer.current() == lex_axis_attribute) + { + axis = axis_attribute; + axis_specified = true; + + m_lexer.next(); + } + else if (m_lexer.current() == lex_dot) + { + m_lexer.next(); + + return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, xpath_lexer_string(), m_alloc); + } + else if (m_lexer.current() == lex_double_dot) + { + m_lexer.next(); + + return new (m_alloc.node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, xpath_lexer_string(), m_alloc); + } + + nodetest_t nt_type = nodetest_none; + xpath_lexer_string nt_name; + + if (m_lexer.current() == lex_string) + { + // node name test + nt_name = m_lexer.contents(); + m_lexer.next(); + + // was it an axis name? + if (m_lexer.current() == lex_double_colon) + { + // parse axis name + if (axis_specified) throw xpath_exception("Two axis specifiers in one step"); + + axis = parse_axis_name(nt_name, axis_specified); + + if (!axis_specified) throw xpath_exception("Unknown axis"); + + // read actual node test + m_lexer.next(); + + if (m_lexer.current() == lex_multiply) + { + nt_type = nodetest_all; + nt_name = xpath_lexer_string(); + m_lexer.next(); + } + else if (m_lexer.current() == lex_string) + { + nt_name = m_lexer.contents(); + m_lexer.next(); + } + else throw xpath_exception("Unrecognized node test"); + } + + if (nt_type == nodetest_none) + { + // node type test or processing-instruction + if (m_lexer.current() == lex_open_brace) + { + m_lexer.next(); + + if (m_lexer.current() == lex_close_brace) + { + m_lexer.next(); + + nt_type = parse_node_test_type(nt_name); + + if (nt_type == nodetest_none) throw xpath_exception("Unrecognized node type"); + + nt_name = xpath_lexer_string(); + } + else if (nt_name == PUGIXML_TEXT("processing-instruction")) + { + if (m_lexer.current() != lex_quoted_string) + throw xpath_exception("Only literals are allowed as arguments to processing-instruction()"); + + nt_type = nodetest_pi; + nt_name = m_lexer.contents(); + m_lexer.next(); + + if (m_lexer.current() != lex_close_brace) + throw xpath_exception("Unmatched brace near processing-instruction()"); + m_lexer.next(); + } + else + throw xpath_exception("Unmatched brace near node type test"); + + } + // QName or NCName:* + else + { + const char_t* colon_pos = std::char_traits::find(nt_name.begin, static_cast(nt_name.end - nt_name.begin), ':'); + + if (colon_pos && colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:* + { + nt_name.end--; // erase * + + nt_type = nodetest_all_in_namespace; + } + else nt_type = nodetest_name; + } + } + } + else if (m_lexer.current() == lex_multiply) + { + nt_type = nodetest_all; + m_lexer.next(); + } + else throw xpath_exception("Unrecognized node test"); + + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name, m_alloc); + + xpath_ast_node* last = 0; + + while (m_lexer.current() == lex_open_square_brace) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_expression(); + + xpath_ast_node* pred = new (m_alloc.node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr); + + if (m_lexer.current() != lex_close_square_brace) + throw xpath_exception("Unmatched square brace"); + m_lexer.next(); + + if (last) last->set_next(pred); + else n->set_right(pred); + + last = pred; + } + + return n; + } + + // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step + xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) + { + xpath_ast_node* n = parse_step(set); + + while (m_lexer.current() == lex_slash || m_lexer.current() == lex_double_slash) + { + lexeme_t l = m_lexer.current(); + m_lexer.next(); + + if (l == lex_double_slash) + n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); + + n = parse_step(n); + } + + return n; + } + + // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath + // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath + xpath_ast_node* parse_location_path() + { + if (m_lexer.current() == lex_slash) + { + m_lexer.next(); + + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + + // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path + lexeme_t l = m_lexer.current(); + + if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) + return parse_relative_location_path(n); + else + return n; + } + else if (m_lexer.current() == lex_double_slash) + { + m_lexer.next(); + + xpath_ast_node* n = new (m_alloc.node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); + + return parse_relative_location_path(n); + } + else + { + return parse_relative_location_path(0); + } + } + + // PathExpr ::= LocationPath + // | FilterExpr + // | FilterExpr '/' RelativeLocationPath + // | FilterExpr '//' RelativeLocationPath + xpath_ast_node* parse_path_expression() + { + // Clarification. + // PathExpr begins with either LocationPath or FilterExpr. + // FilterExpr begins with PrimaryExpr + // PrimaryExpr begins with '$' in case of it being a variable reference, + // '(' in case of it being an expression, string literal, number constant or + // function call. + + if (m_lexer.current() == lex_var_ref || m_lexer.current() == lex_open_brace || + m_lexer.current() == lex_quoted_string || m_lexer.current() == lex_number || + m_lexer.current() == lex_string) + { + if (m_lexer.current() == lex_string) + { + // This is either a function call, or not - if not, we shall proceed with location path + const char_t* state = m_lexer.state(); + + while (IS_CHARTYPEX(*state, ctx_space)) ++state; + + if (*state != '(') return parse_location_path(); + + // This looks like a function call; however this still can be a node-test. Check it. + if (parse_node_test_type(m_lexer.contents()) != nodetest_none) return parse_location_path(); + } + + xpath_ast_node* n = parse_filter_expression(); + + if (m_lexer.current() == lex_slash || m_lexer.current() == lex_double_slash) + { + lexeme_t l = m_lexer.current(); + m_lexer.next(); + + if (l == lex_double_slash) + { + if (n->rettype() != xpath_type_node_set) throw xpath_exception("Step has to be applied to node set"); + + n = new (m_alloc.node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, xpath_lexer_string(), m_alloc); + } + + // select from location path + return parse_relative_location_path(n); + } + + return n; + } + else return parse_location_path(); + } + + // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr + xpath_ast_node* parse_union_expression() + { + xpath_ast_node* n = parse_path_expression(); + + while (m_lexer.current() == lex_union) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_union_expression(); + + if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set) + throw xpath_exception("Union operator has to be applied to node sets"); + + n = new (m_alloc.node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr); + } + + return n; + } + + // UnaryExpr ::= UnionExpr | '-' UnaryExpr + xpath_ast_node* parse_unary_expression() + { + if (m_lexer.current() == lex_minus) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_unary_expression(); + + return new (m_alloc.node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); + } + else return parse_union_expression(); + } + + // MultiplicativeExpr ::= UnaryExpr + // | MultiplicativeExpr '*' UnaryExpr + // | MultiplicativeExpr 'div' UnaryExpr + // | MultiplicativeExpr 'mod' UnaryExpr + xpath_ast_node* parse_multiplicative_expression() + { + xpath_ast_node* n = parse_unary_expression(); + + while (m_lexer.current() == lex_multiply || (m_lexer.current() == lex_string && + (m_lexer.contents() == PUGIXML_TEXT("mod") || m_lexer.contents() == PUGIXML_TEXT("div")))) + { + ast_type_t op = m_lexer.current() == lex_multiply ? ast_op_multiply : + m_lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod; + m_lexer.next(); + + xpath_ast_node* expr = parse_unary_expression(); + + n = new (m_alloc.node()) xpath_ast_node(op, xpath_type_number, n, expr); + } + + return n; + } + + // AdditiveExpr ::= MultiplicativeExpr + // | AdditiveExpr '+' MultiplicativeExpr + // | AdditiveExpr '-' MultiplicativeExpr + xpath_ast_node* parse_additive_expression() + { + xpath_ast_node* n = parse_multiplicative_expression(); + + while (m_lexer.current() == lex_plus || m_lexer.current() == lex_minus) + { + lexeme_t l = m_lexer.current(); + + m_lexer.next(); + + xpath_ast_node* expr = parse_multiplicative_expression(); + + n = new (m_alloc.node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr); + } + + return n; + } + + // RelationalExpr ::= AdditiveExpr + // | RelationalExpr '<' AdditiveExpr + // | RelationalExpr '>' AdditiveExpr + // | RelationalExpr '<=' AdditiveExpr + // | RelationalExpr '>=' AdditiveExpr + xpath_ast_node* parse_relational_expression() + { + xpath_ast_node* n = parse_additive_expression(); + + while (m_lexer.current() == lex_less || m_lexer.current() == lex_less_or_equal || + m_lexer.current() == lex_greater || m_lexer.current() == lex_greater_or_equal) + { + lexeme_t l = m_lexer.current(); + m_lexer.next(); + + xpath_ast_node* expr = parse_additive_expression(); + + n = new (m_alloc.node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater : + l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr); + } + + return n; + } + + // EqualityExpr ::= RelationalExpr + // | EqualityExpr '=' RelationalExpr + // | EqualityExpr '!=' RelationalExpr + xpath_ast_node* parse_equality_expression() + { + xpath_ast_node* n = parse_relational_expression(); + + while (m_lexer.current() == lex_equal || m_lexer.current() == lex_not_equal) + { + lexeme_t l = m_lexer.current(); + + m_lexer.next(); + + xpath_ast_node* expr = parse_relational_expression(); + + n = new (m_alloc.node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr); + } + + return n; + } + + // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr + xpath_ast_node* parse_and_expression() + { + xpath_ast_node* n = parse_equality_expression(); + + while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("and")) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_equality_expression(); + + n = new (m_alloc.node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr); + } + + return n; + } + + // OrExpr ::= AndExpr | OrExpr 'or' AndExpr + xpath_ast_node* parse_or_expression() + { + xpath_ast_node* n = parse_and_expression(); + + while (m_lexer.current() == lex_string && m_lexer.contents() == PUGIXML_TEXT("or")) + { + m_lexer.next(); + + xpath_ast_node* expr = parse_and_expression(); + + n = new (m_alloc.node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr); + } + + return n; + } + + // Expr ::= OrExpr + xpath_ast_node* parse_expression() + { + return parse_or_expression(); + } + + public: + explicit xpath_parser(const char_t* query, xpath_allocator& alloc): m_alloc(alloc), m_lexer(query) + { + } + + xpath_ast_node* parse() + { + xpath_ast_node* result = parse_expression(); + + if (m_lexer.current() != lex_eof) + { + // there are still unparsed tokens left, error + throw xpath_exception("Incorrect query"); + } + + return result; + } + }; + + xpath_query::xpath_query(const char_t* query): m_alloc(0), m_root(0) + { + compile(query); + } + + xpath_query::~xpath_query() + { + delete m_alloc; + } + + void xpath_query::compile(const char_t* query) + { + delete m_alloc; + m_alloc = new xpath_allocator; + + xpath_parser p(query, *m_alloc); + + m_root = p.parse(); + } + + xpath_value_type xpath_query::return_type() const + { + if (!m_root) return xpath_type_none; + + return m_root->rettype(); + } + + bool xpath_query::evaluate_boolean(const xml_node& n) const + { + if (!m_root) return false; + + xpath_context c; + + c.root = n.root(); + c.n = n; + c.position = 1; + c.size = 1; + + return m_root->eval_boolean(c); + } + + double xpath_query::evaluate_number(const xml_node& n) const + { + if (!m_root) return gen_nan(); + + xpath_context c; + + c.root = n.root(); + c.n = n; + c.position = 1; + c.size = 1; + + return m_root->eval_number(c); + } + + string_t xpath_query::evaluate_string(const xml_node& n) const + { + if (!m_root) return string_t(); + + xpath_context c; + + c.root = n.root(); + c.n = n; + c.position = 1; + c.size = 1; + + return m_root->eval_string(c); + } + + xpath_node_set xpath_query::evaluate_node_set(const xml_node& n) const + { + if (!m_root) return xpath_node_set(); + if (m_root->rettype() != xpath_type_node_set) throw xpath_exception("Expression does not evaluate to node set"); + + xpath_context c; + + c.root = n.root(); + c.n = n; + c.position = 1; + c.size = 1; + + return m_root->eval_node_set(c); + } + + xpath_node xml_node::select_single_node(const char_t* query) const + { + xpath_query q(query); + return select_single_node(q); + } + + xpath_node xml_node::select_single_node(const xpath_query& query) const + { + xpath_node_set s = query.evaluate_node_set(*this); + return s.empty() ? xpath_node() : s.first(); + } + + xpath_node_set xml_node::select_nodes(const char_t* query) const + { + xpath_query q(query); + return select_nodes(q); + } + + xpath_node_set xml_node::select_nodes(const xpath_query& query) const + { + return query.evaluate_node_set(*this); + } +} + +#endif + +/** + * Copyright (c) 2006-2010 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ -- cgit v1.2.3