summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2010-08-29 15:31:55 +0000
committerarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2010-08-29 15:31:55 +0000
commit9b337a176f89c2261211188cc398c3e15952d87a (patch)
treeb621aac0c69e313046a3dcf403621e09c1291692
parentfd6b419b2aa5d7f8d7e3941ee2a1d72ae5f3257d (diff)
XPath: Moved implementation to pugixml.cpp
git-svn-id: http://pugixml.googlecode.com/svn/trunk@670 99668b35-9821-0410-8761-19e4c4f06640
-rw-r--r--Jamfile.jam2
-rw-r--r--src/pugixml.cpp3851
-rw-r--r--src/pugixpath.cpp3890
3 files changed, 3852 insertions, 3891 deletions
diff --git a/Jamfile.jam b/Jamfile.jam
index 63dd298..0ca21a4 100644
--- a/Jamfile.jam
+++ b/Jamfile.jam
@@ -75,7 +75,7 @@ for CONFIG in $(CONFIGURATIONS)
# build library
local PUGIXML = $(CFGBUILD)/pugixml.lib ;
- Library $(PUGIXML) : src/pugixml.cpp src/pugixpath.cpp : $(CFGFLAGS) ;
+ Library $(PUGIXML) : src/pugixml.cpp : $(CFGFLAGS) ;
Alias pugixml : $(PUGIXML) ;
# build tests
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index d319069..db9564c 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -4496,6 +4496,3857 @@ namespace std
}
#endif
+#ifndef PUGIXML_NO_XPATH
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <setjmp.h>
+#include <ctype.h>
+#include <math.h>
+#include <float.h>
+
+#ifdef PUGIXML_WCHAR_MODE
+# include <wchar.h>
+#endif
+
+#include <new>
+
+#ifndef PUGIXML_NO_STL
+# include <string>
+#endif
+
+// int32_t
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+# include <stdint.h>
+#else
+typedef __int32 int32_t;
+#endif
+
+#if defined(_MSC_VER)
+# pragma warning(disable: 4127) // conditional expression is constant
+# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
+# pragma warning(disable: 4702) // unreachable code
+# pragma warning(disable: 4996) // this function or variable may be unsafe
+#endif
+
+#ifdef __INTEL_COMPILER
+# pragma warning(disable: 1478 1786) // function was declared "deprecated"
+#endif
+
+#ifdef __SNC__
+# pragma diag_suppress=237 // controlling expression is constant
+#endif
+
+// String utilities prototypes
+namespace pugi
+{
+ namespace impl
+ {
+ size_t strlen(const char_t* s);
+ bool strequal(const char_t* src, const char_t* dst);
+ bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count);
+ void widen_ascii(wchar_t* dest, const char* source);
+ }
+}
+
+// STL replacements
+namespace pstd
+{
+ struct equal_to
+ {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs == rhs;
+ }
+ };
+
+ struct not_equal_to
+ {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs != rhs;
+ }
+ };
+
+ struct less
+ {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs < rhs;
+ }
+ };
+
+ struct less_equal
+ {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs <= rhs;
+ }
+ };
+
+ template <typename T> void swap(T& lhs, T& rhs)
+ {
+ T temp = lhs;
+ lhs = rhs;
+ rhs = temp;
+ }
+
+ template <typename I, typename J> void copy(I begin, I end, J target)
+ {
+ while (begin != end) *target++ = *begin++;
+ }
+
+ template <typename I, typename T> I find(I begin, I end, T elem)
+ {
+ for (I it = begin; it != end; ++it)
+ if (*it == elem)
+ return it;
+
+ return end;
+ }
+
+ template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
+ {
+ I result = begin;
+
+ for (I it = begin + 1; it != end; ++it)
+ if (pred(*it, *result))
+ result = it;
+
+ return result;
+ }
+
+ template <typename I> void reverse(I begin, I end)
+ {
+ while (begin + 1 < end) swap(*begin++, *--end);
+ }
+
+ template <typename I> I unique(I begin, I end)
+ {
+ // fast skip head
+ while (begin + 1 < end && *begin != *(begin + 1)) begin++;
+
+ if (begin == end) return begin;
+
+ // last written element
+ I write = begin++;
+
+ // merge unique elements
+ while (begin != end)
+ {
+ if (*begin != *write)
+ *++write = *begin++;
+ else
+ begin++;
+ }
+
+ // past-the-end (write points to live element)
+ return write + 1;
+ }
+
+ template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
+ {
+ while (begin != end) swap(*begin, *min_element(begin, end, pred)), begin++;
+ }
+}
+
+namespace
+{
+ using namespace pugi;
+
+ class xpath_string
+ {
+ const char_t* _buffer;
+ bool _uses_heap;
+
+ static char_t* duplicate_string(const char_t* string, size_t length)
+ {
+ char_t* result = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
+ if (!result) return 0; // $$ out of memory
+
+ memcpy(result, string, length * sizeof(char_t));
+ result[length] = 0;
+
+ return result;
+ }
+
+ static char_t* duplicate_string(const char_t* string)
+ {
+ return duplicate_string(string, impl::strlen(string));
+ }
+
+ public:
+ xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
+ {
+ }
+
+ ~xpath_string()
+ {
+ if (_uses_heap) get_memory_deallocation_function()(const_cast<char_t*>(_buffer));
+ }
+
+ explicit xpath_string(const char_t* str)
+ {
+ bool empty = (*str == 0);
+
+ _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(str);
+ _uses_heap = !empty;
+ }
+
+ explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
+ {
+ }
+
+ xpath_string(const char_t* begin, const char_t* end)
+ {
+ assert(begin <= end);
+
+ if (begin != end)
+ {
+ _buffer = duplicate_string(begin, static_cast<size_t>(end - begin));
+ _uses_heap = true;
+ }
+ else
+ {
+ _buffer = PUGIXML_TEXT("");
+ _uses_heap = false;
+ }
+ }
+
+ xpath_string(const xpath_string& o)
+ {
+ _buffer = o._uses_heap ? duplicate_string(o._buffer) : o._buffer;
+ _uses_heap = o._uses_heap;
+ }
+
+ xpath_string& operator=(const xpath_string& o)
+ {
+ xpath_string temp(o);
+ swap(temp);
+
+ return *this;
+ }
+
+ void swap(xpath_string& o)
+ {
+ pstd::swap(_buffer, o._buffer);
+ pstd::swap(_uses_heap, o._uses_heap);
+ }
+
+ void append(const xpath_string& o)
+ {
+ // skip empty sources
+ if (!*o._buffer) return;
+
+ // fast append for empty target and constant source
+ if (!*_buffer && !o._uses_heap)
+ {
+ _buffer = o._buffer;
+ _uses_heap = false;
+ }
+ else
+ {
+ // need to make heap copy
+ size_t target_length = impl::strlen(_buffer);
+ size_t source_length = impl::strlen(o._buffer);
+ size_t length = target_length + source_length;
+
+ // allocate new buffer
+ char_t* result = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
+ if (!result) return; // $$ out of memory
+
+ // append both strings in the new buffer
+ memcpy(result, _buffer, target_length * sizeof(char_t));
+ memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
+ result[length] = 0;
+
+ // finalize
+ xpath_string(result, true).swap(*this);
+ }
+ }
+
+ const char_t* c_str() const
+ {
+ return _buffer;
+ }
+
+ size_t length() const
+ {
+ return impl::strlen(_buffer);
+ }
+
+ char_t* data()
+ {
+ // make private heap copy
+ if (!_uses_heap)
+ {
+ _buffer = duplicate_string(_buffer);
+ _uses_heap = true;
+ }
+
+ return const_cast<char_t*>(_buffer);
+ }
+
+ bool empty() const
+ {
+ return *_buffer == 0;
+ }
+
+ bool operator==(const xpath_string& o) const
+ {
+ return impl::strequal(_buffer, o._buffer);
+ }
+
+ bool operator!=(const xpath_string& o) const
+ {
+ return !impl::strequal(_buffer, o._buffer);
+ }
+ };
+
+ xpath_string xpath_string_const(const char_t* str)
+ {
+ return xpath_string(str, false);
+ }
+}
+
+namespace
+{
+ using namespace pugi;
+
+ enum chartypex
+ {
+ ctx_space = 1, // \r, \n, space, tab
+ ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _
+ ctx_digit = 4, // 0-9
+ ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+ };
+
+ const unsigned char chartypex_table[256] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63
+ 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95
+ 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127
+
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
+ };
+
+#ifdef PUGIXML_WCHAR_MODE
+ #define IS_CHARTYPEX(c, ct) ((static_cast<unsigned int>(c) < 128 ? chartypex_table[static_cast<unsigned int>(c)] : chartypex_table[128]) & (ct))
+#else
+ #define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast<unsigned char>(c)] & (ct))
+#endif
+
+ bool starts_with(const char_t* string, const char_t* pattern)
+ {
+ while (*pattern && *string == *pattern)
+ {
+ string++;
+ pattern++;
+ }
+
+ return *pattern == 0;
+ }
+
+ const char_t* find_char(const char_t* s, char_t c)
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcschr(s, c);
+ #else
+ return strchr(s, c);
+ #endif
+ }
+
+ const char_t* find_substring(const char_t* s, const char_t* p)
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcsstr(s, p);
+ #else
+ return strstr(s, p);
+ #endif
+ }
+
+ xpath_string string_value(const xpath_node& na)
+ {
+ if (na.attribute())
+ return xpath_string_const(na.attribute().value());
+ else
+ {
+ const xml_node& n = na.node();
+
+ switch (n.type())
+ {
+ case node_pcdata:
+ case node_cdata:
+ case node_comment:
+ case node_pi:
+ return xpath_string_const(n.value());
+
+ case node_document:
+ case node_element:
+ {
+ xpath_string result;
+
+ xml_node cur = n.first_child();
+
+ while (cur && cur != n)
+ {
+ if (cur.type() == node_pcdata || cur.type() == node_cdata)
+ result.append(xpath_string_const(cur.value()));
+
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else
+ {
+ while (!cur.next_sibling() && cur != n)
+ cur = cur.parent();
+
+ if (cur != n) cur = cur.next_sibling();
+ }
+ }
+
+ return result;
+ }
+
+ default:
+ return xpath_string();
+ }
+ }
+ }
+
+ unsigned int node_height(xml_node n)
+ {
+ unsigned int result = 0;
+
+ while (n)
+ {
+ ++result;
+ n = n.parent();
+ }
+
+ return result;
+ }
+
+ // precondition: node_height of ln is <= node_height of rn, ln != rn
+ bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
+ {
+ assert(lh <= rh);
+
+ while (lh < rh)
+ {
+ --rh;
+ rn = rn.parent();
+ }
+
+ if (ln == rn) return true;
+
+ while (ln.parent() != rn.parent())
+ {
+ ln = ln.parent();
+ rn = rn.parent();
+ }
+
+ for (; ln; ln = ln.next_sibling())
+ if (ln == rn)
+ return true;
+
+ return false;
+ }
+
+ bool node_is_ancestor(xml_node parent, xml_node node)
+ {
+ while (node && node != parent) node = node.parent();
+
+ return parent && node == parent;
+ }
+
+ struct document_order_comparator
+ {
+ bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
+ {
+ xml_node ln = lhs.node(), rn = rhs.node();
+
+ const void* lo = lhs.attribute() ? lhs.attribute().document_order() : ln.document_order();
+ const void* ro = rhs.attribute() ? rhs.attribute().document_order() : rn.document_order();
+
+ if (lo && ro) return lo < ro;
+
+ if (lhs.attribute() && rhs.attribute())
+ {
+ if (lhs.parent() == rhs.parent())
+ {
+ for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
+ if (a == rhs.attribute())
+ return true;
+
+ return false;
+ }
+
+ ln = lhs.parent();
+ rn = rhs.parent();
+ }
+ else if (lhs.attribute())
+ {
+ if (lhs.parent() == rhs.node()) return false;
+
+ ln = lhs.parent();
+ }
+ else if (rhs.attribute())
+ {
+ if (rhs.parent() == lhs.node()) return true;
+
+ rn = rhs.parent();
+ }
+
+ if (ln == rn) return false;
+
+ unsigned int lh = node_height(ln);
+ unsigned int rh = node_height(rn);
+
+ return (lh <= rh) ? node_is_before(ln, lh, rn, rh) : !node_is_before(rn, rh, ln, lh);
+ }
+ };
+
+ struct duplicate_comparator
+ {
+ bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
+ {
+ if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
+ else return rhs.attribute() ? false : lhs.node() < rhs.node();
+ }
+ };
+
+ double gen_nan()
+ {
+ #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
+ union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1];
+ u[0].i = 0x7fc00000;
+ return u[0].f;
+ #else
+ // fallback
+ const volatile double zero = 0.0;
+ return zero / zero;
+ #endif
+ }
+
+ bool is_nan(double value)
+ {
+ #if defined(_MSC_VER) || defined(__BORLANDC__)
+ return !!_isnan(value);
+ #elif defined(fpclassify) && defined(FP_NAN)
+ return fpclassify(value) == FP_NAN;
+ #else
+ // fallback
+ const volatile double v = value;
+ return v != v;
+ #endif
+ }
+
+ const char_t* convert_number_to_string_special(double value)
+ {
+ #if defined(_MSC_VER) || defined(__BORLANDC__)
+ if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
+ if (_isnan(value)) return PUGIXML_TEXT("NaN");
+ return PUGIXML_TEXT("-Infinity") + (value > 0);
+ #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
+ switch (fpclassify(value))
+ {
+ case FP_NAN:
+ return PUGIXML_TEXT("NaN");
+
+ case FP_INFINITE:
+ return PUGIXML_TEXT("-Infinity") + (value > 0);
+
+ case FP_ZERO:
+ return PUGIXML_TEXT("0");
+
+ default:
+ return 0;
+ }
+ #else
+ // fallback
+ const volatile double v = value;
+
+ if (v == 0) return PUGIXML_TEXT("0");
+ if (v != v) return PUGIXML_TEXT("NaN");
+ if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0);
+ return 0;
+ #endif
+ }
+
+ bool convert_number_to_boolean(double value)
+ {
+ return (value != 0 && !is_nan(value));
+ }
+
+ void truncate_zeros(char* begin, char* end)
+ {
+ while (begin != end && end[-1] == '0') end--;
+
+ *end = 0;
+ }
+
+ // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+ void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+ {
+ // get base values
+ int sign, exponent;
+ _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
+
+ // truncate redundant zeros
+ truncate_zeros(buffer, buffer + strlen(buffer));
+
+ // fill results
+ *out_mantissa = buffer;
+ *out_exponent = exponent;
+ }
+#else
+ void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+ {
+ // get a scientific notation value with IEEE DBL_DIG decimals
+ sprintf(buffer, "%.*e", DBL_DIG, value);
+ assert(strlen(buffer) < buffer_size);
+ (void)!buffer_size;
+
+ // get the exponent (possibly negative)
+ char* exponent_string = strchr(buffer, 'e');
+ assert(exponent_string);
+
+ int exponent = atoi(exponent_string + 1);
+
+ // extract mantissa string: skip sign
+ char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
+ assert(mantissa[0] != '0' && mantissa[1] == '.');
+
+ // divide mantissa by 10 to eliminate integer part
+ mantissa[1] = mantissa[0];
+ mantissa++;
+ exponent++;
+
+ // remove extra mantissa digits and zero-terminate mantissa
+ truncate_zeros(mantissa, exponent_string);
+
+ // fill results
+ *out_mantissa = mantissa;
+ *out_exponent = exponent;
+ }
+#endif
+
+ xpath_string convert_number_to_string(double value)
+ {
+ // try special number conversion
+ const char_t* special = convert_number_to_string_special(value);
+ if (special) return xpath_string_const(special);
+
+ // get mantissa + exponent form
+ char mantissa_buffer[64];
+
+ char* mantissa;
+ int exponent;
+ convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
+
+ // make the number!
+ char_t result[512];
+ char_t* s = result;
+
+ // sign
+ if (value < 0) *s++ = '-';
+
+ // integer part
+ if (exponent <= 0)
+ {
+ *s++ = '0';
+ }
+ else
+ {
+ while (exponent > 0)
+ {
+ assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9);
+ *s++ = *mantissa ? *mantissa++ : '0';
+ exponent--;
+ }
+ }
+
+ // fractional part
+ if (*mantissa)
+ {
+ // decimal point
+ *s++ = '.';
+
+ // extra zeroes from negative exponent
+ while (exponent < 0)
+ {
+ *s++ = '0';
+ exponent++;
+ }
+
+ // extra mantissa digits
+ while (*mantissa)
+ {
+ assert((unsigned)(*mantissa - '0') <= 9);
+ *s++ = *mantissa++;
+ }
+ }
+
+ // zero-terminate
+ assert(s < result + sizeof(result) / sizeof(result[0]));
+ *s = 0;
+
+ return xpath_string(result);
+ }
+
+ bool check_string_to_number_format(const char_t* string)
+ {
+ // parse leading whitespace
+ while (IS_CHARTYPEX(*string, ctx_space)) ++string;
+
+ // parse sign
+ if (*string == '-') ++string;
+
+ if (!*string) return false;
+
+ // if there is no integer part, there should be a decimal part with at least one digit
+ if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false;
+
+ // parse integer part
+ while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
+
+ // parse decimal part
+ if (*string == '.')
+ {
+ ++string;
+
+ while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
+ }
+
+ // parse trailing whitespace
+ while (IS_CHARTYPEX(*string, ctx_space)) ++string;
+
+ return *string == 0;
+ }
+
+ double convert_string_to_number(const char_t* string)
+ {
+ // check string format
+ if (!check_string_to_number_format(string)) return gen_nan();
+
+ // parse string
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcstod(string, 0);
+ #else
+ return atof(string);
+ #endif
+ }
+
+ bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
+ {
+ char_t buffer[32];
+
+ size_t length = static_cast<size_t>(end - begin);
+ char_t* scratch = buffer;
+
+ if (length >= sizeof(buffer) / sizeof(buffer[0]))
+ {
+ // need to make dummy on-heap copy
+ scratch = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
+ if (!scratch) return false;
+ }
+
+ // copy string to zero-terminated buffer and perform conversion
+ memcpy(scratch, begin, length * sizeof(char_t));
+ scratch[length] = 0;
+
+ *out_result = convert_string_to_number(scratch);
+
+ // free dummy buffer
+ if (scratch != buffer) get_memory_deallocation_function()(scratch);
+
+ return true;
+ }
+
+ double round_nearest(double value)
+ {
+ return floor(value + 0.5);
+ }
+
+ double round_nearest_nzero(double value)
+ {
+ // same as round_nearest, but returns -0 for [-0.5, -0]
+ // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
+ return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
+ }
+
+ const char_t* qualified_name(const xpath_node& node)
+ {
+ return node.attribute() ? node.attribute().name() : node.node().name();
+ }
+
+ const char_t* local_name(const xpath_node& node)
+ {
+ const char_t* name = qualified_name(node);
+ const char_t* p = find_char(name, ':');
+
+ return p ? p + 1 : name;
+ }
+
+ struct namespace_uri_predicate
+ {
+ const char_t* prefix;
+ size_t prefix_length;
+
+ namespace_uri_predicate(const char_t* name)
+ {
+ const char_t* pos = find_char(name, ':');
+
+ prefix = pos ? name : 0;
+ prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
+ }
+
+ bool operator()(const xml_attribute& a) const
+ {
+ const char_t* name = a.name();
+
+ if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
+
+ return prefix ? name[5] == ':' && impl::strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
+ }
+ };
+
+ const char_t* namespace_uri(const xml_node& node)
+ {
+ namespace_uri_predicate pred = node.name();
+
+ xml_node p = node;
+
+ while (p)
+ {
+ xml_attribute a = p.find_attribute(pred);
+
+ if (a) return a.value();
+
+ p = p.parent();
+ }
+
+ return PUGIXML_TEXT("");
+ }
+
+ const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
+ {
+ namespace_uri_predicate pred = attr.name();
+
+ // Default namespace does not apply to attributes
+ if (!pred.prefix) return PUGIXML_TEXT("");
+
+ xml_node p = parent;
+
+ while (p)
+ {
+ xml_attribute a = p.find_attribute(pred);
+
+ if (a) return a.value();
+
+ p = p.parent();
+ }
+
+ return PUGIXML_TEXT("");
+ }
+
+ const char_t* namespace_uri(const xpath_node& node)
+ {
+ return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
+ }
+
+ void normalize_space(char_t* buffer)
+ {
+ char_t* write = buffer;
+
+ for (char_t* it = buffer; *it; )
+ {
+ char_t ch = *it++;
+
+ if (IS_CHARTYPEX(ch, ctx_space))
+ {
+ // replace whitespace sequence with single space
+ while (IS_CHARTYPEX(*it, ctx_space)) it++;
+
+ // avoid leading spaces
+ if (write != buffer) *write++ = ' ';
+ }
+ else *write++ = ch;
+ }
+
+ // remove trailing space
+ if (write != buffer && IS_CHARTYPEX(write[-1], ctx_space)) write--;
+
+ // zero-terminate
+ *write = 0;
+ }
+
+ void translate(char_t* buffer, const char_t* from, const char_t* to)
+ {
+ size_t to_length = impl::strlen(to);
+
+ char_t* write = buffer;
+
+ while (*buffer)
+ {
+ #ifdef __DMC__
+ volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from buffer otherwise)
+ #endif
+ char_t ch = *buffer++;
+
+ const char_t* pos = find_char(from, ch);
+
+ if (!pos)
+ *write++ = ch; // do not process
+ else if (static_cast<size_t>(pos - from) < to_length)
+ *write++ = to[pos - from]; // replace
+ }
+
+ // zero-terminate
+ *write = 0;
+ }
+}
+
+namespace pugi
+{
+#ifndef PUGIXML_NO_EXCEPTIONS
+ xpath_exception::xpath_exception(const xpath_parse_result& result): _result(result)
+ {
+ assert(result.error);
+ }
+
+ const char* xpath_exception::what() const throw()
+ {
+ return _result.error;
+ }
+
+ const xpath_parse_result& xpath_exception::result() const
+ {
+ return _result;
+ }
+#endif
+
+ const size_t xpath_memory_block_size = 4096;
+
+ class xpath_allocator
+ {
+ struct memory_block
+ {
+ memory_block* next;
+
+ char data[xpath_memory_block_size];
+ };
+
+ memory_block* _root;
+ size_t _root_size;
+
+ memory_block _first;
+
+ public:
+ static xpath_allocator* create()
+ {
+ void* memory = get_memory_allocation_function()(sizeof(xpath_allocator));
+
+ return new (memory) xpath_allocator();
+ }
+
+ static void destroy(xpath_allocator* alloc)
+ {
+ if (!alloc) return;
+
+ // free all allocated pages
+ memory_block* cur = alloc->_root;
+ memory_block* first = &alloc->_first;
+
+ while (cur != first)
+ {
+ memory_block* next = cur->next;
+
+ get_memory_deallocation_function()(cur);
+
+ cur = next;
+ }
+
+ // free allocator memory (with the first page)
+ get_memory_deallocation_function()(alloc);
+ }
+
+ xpath_allocator()
+ {
+ _root = &_first;
+ _root_size = 0;
+ _first.next = 0;
+ }
+
+ void* allocate(size_t size)
+ {
+ // align size so that we're able to store pointers in subsequent blocks
+ size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+
+ if (_root_size + size <= xpath_memory_block_size)
+ {
+ void* buf = _root->data + _root_size;
+ _root_size += size;
+ return buf;
+ }
+ else
+ {
+ size_t block_data_size = (size > xpath_memory_block_size) ? size : xpath_memory_block_size;
+ size_t block_size = block_data_size + offsetof(memory_block, data);
+
+ memory_block* block = static_cast<memory_block*>(get_memory_allocation_function()(block_size));
+ if (!block) return 0;
+
+ block->next = _root;
+
+ _root = block;
+ _root_size = size;
+
+ return block->data;
+ }
+ }
+ };
+
+ xpath_node::xpath_node()
+ {
+ }
+
+ xpath_node::xpath_node(const xml_node& node): _node(node)
+ {
+ }
+
+ xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): _node(parent), _attribute(attribute)
+ {
+ }
+
+ xml_node xpath_node::node() const
+ {
+ return _attribute ? xml_node() : _node;
+ }
+
+ xml_attribute xpath_node::attribute() const
+ {
+ return _attribute;
+ }
+
+ xml_node xpath_node::parent() const
+ {
+ return _attribute ? _node : _node.parent();
+ }
+
+ xpath_node::operator xpath_node::unspecified_bool_type() const
+ {
+ return (_node || _attribute) ? &xpath_node::_node : 0;
+ }
+
+ bool xpath_node::operator!() const
+ {
+ return !(_node || _attribute);
+ }
+
+ bool xpath_node::operator==(const xpath_node& n) const
+ {
+ return _node == n._node && _attribute == n._attribute;
+ }
+
+ bool xpath_node::operator!=(const xpath_node& n) const
+ {
+ return _node != n._node || _attribute != n._attribute;
+ }
+
+#ifdef __BORLANDC__
+ bool operator&&(const xpath_node& lhs, bool rhs)
+ {
+ return (bool)lhs && rhs;
+ }
+
+ bool operator||(const xpath_node& lhs, bool rhs)
+ {
+ return (bool)lhs || rhs;
+ }
+#endif
+
+ xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage), _eos(&_storage + 1)
+ {
+ }
+
+ xpath_node_set::~xpath_node_set()
+ {
+ if (_begin != &_storage) get_memory_deallocation_function()(_begin);
+ }
+
+ xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage), _eos(&_storage + 1)
+ {
+ if (ns.size() == 1)
+ {
+ _storage = *ns._begin;
+ _end = _eos;
+ }
+ else
+ {
+ append(ns.begin(), ns.end());
+ }
+ }
+
+ xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
+ {
+ if (this == &ns) return *this;
+
+ // clear & append
+ _type = ns._type;
+ _end = _begin;
+
+ append(ns.begin(), ns.end());
+
+ return *this;
+ }
+
+ void xpath_node_set::swap(xpath_node_set& ns)
+ {
+ pstd::swap(_type, ns._type);
+ pstd::swap(_storage, ns._storage);
+ pstd::swap(_begin, ns._begin);
+ pstd::swap(_end, ns._end);
+ pstd::swap(_eos, ns._eos);
+ }
+
+ xpath_node_set::type_t xpath_node_set::type() const
+ {
+ return _type;
+ }
+
+ size_t xpath_node_set::size() const
+ {
+ return _end - _begin;
+ }
+
+ bool xpath_node_set::empty() const
+ {
+ return size() == 0;
+ }
+
+ const xpath_node& xpath_node_set::operator[](size_t index) const
+ {
+ assert(index < size());
+ return _begin[index];
+ }
+
+ xpath_node_set::iterator xpath_node_set::mut_begin()
+ {
+ return _begin;
+ }
+
+ xpath_node_set::const_iterator xpath_node_set::begin() const
+ {
+ return _begin;
+ }
+
+ xpath_node_set::const_iterator xpath_node_set::end() const
+ {
+ return _end;
+ }
+
+ void xpath_node_set::sort(bool reverse)
+ {
+ pstd::sort(_begin, _end, document_order_comparator());
+
+ if (reverse) pstd::reverse(_begin, _end);
+
+ _type = reverse ? type_sorted_reverse : type_sorted;
+ }
+
+ void xpath_node_set::push_back(const xpath_node& n)
+ {
+ if (_end == _eos)
+ append(&n, &n + 1);
+ else
+ {
+ *_end = n;
+ ++_end;
+ }
+ }
+
+ void xpath_node_set::append(const_iterator begin, const_iterator end)
+ {
+ if (begin == end) return;
+
+ size_t count = end - begin;
+ size_t size = _end - _begin;
+ size_t capacity = _eos - _begin;
+
+ if (capacity < size + count)
+ {
+ if (capacity < 2) capacity = 2;
+
+ while (capacity < size + count) capacity += capacity / 2;
+
+ xpath_node* storage = static_cast<xpath_node*>(get_memory_allocation_function()(capacity * sizeof(xpath_node)));
+ if (!storage) return; // $$ out of memory
+
+ pstd::copy(_begin, _end, storage);
+ // memcpy(storage, _begin, size * sizeof(xpath_node));
+
+ if (_begin != &_storage) get_memory_deallocation_function()(_begin);
+
+ _begin = storage;
+ _end = storage + size;
+ _eos = storage + capacity;
+ }
+
+ pstd::copy(begin, end, _end);
+ // memcpy(_end, begin, count * sizeof(xpath_node));
+ _end += count;
+ }
+
+ void xpath_node_set::truncate(iterator it)
+ {
+ _end = it;
+ }
+
+ xpath_node xpath_node_set::first() const
+ {
+ if (empty()) return xpath_node();
+
+ switch (_type)
+ {
+ case type_sorted: return *_begin;
+ case type_sorted_reverse: return *(_end - 1);
+ case type_unsorted: return *pstd::min_element(_begin, _end, document_order_comparator());
+ default: return xpath_node();
+ }
+ }
+
+ void xpath_node_set::remove_duplicates()
+ {
+ if (_type == type_unsorted)
+ {
+ pstd::sort(_begin, _end, duplicate_comparator());
+ }
+
+ truncate(pstd::unique(_begin, _end));
+ }
+
+ struct xpath_context
+ {
+ xpath_node n;
+ size_t position, size;
+
+ xpath_context(const xpath_node& n, size_t position, size_t size): n(n), position(position), size(size)
+ {
+ }
+ };
+
+ enum lexeme_t
+ {
+ lex_none = 0,
+ lex_equal,
+ lex_not_equal,
+ lex_less,
+ lex_greater,
+ lex_less_or_equal,
+ lex_greater_or_equal,
+ lex_plus,
+ lex_minus,
+ lex_multiply,
+ lex_union,
+ lex_var_ref,
+ lex_open_brace,
+ lex_close_brace,
+ lex_quoted_string,
+ lex_number,
+ lex_slash,
+ lex_double_slash,
+ lex_open_square_brace,
+ lex_close_square_brace,
+ lex_string,
+ lex_comma,
+ lex_axis_attribute,
+ lex_dot,
+ lex_double_dot,
+ lex_double_colon,
+ lex_eof
+ };
+
+ struct xpath_lexer_string
+ {
+ const char_t* begin;
+ const char_t* end;
+
+ xpath_lexer_string(): begin(0), end(0)
+ {
+ }
+
+ bool operator==(const char_t* other) const
+ {
+ size_t length = static_cast<size_t>(end - begin);
+
+ return impl::strequalrange(other, begin, length);
+ }
+ };
+
+ class xpath_lexer
+ {
+ const char_t* _cur;
+ const char_t* _cur_lexeme_pos;
+ xpath_lexer_string _cur_lexeme_contents;
+
+ lexeme_t _cur_lexeme;
+
+ public:
+ explicit xpath_lexer(const char_t* query): _cur(query)
+ {
+ next();
+ }
+
+ const char_t* state() const
+ {
+ return _cur;
+ }
+
+ void next()
+ {
+ const char_t* cur = _cur;
+
+ while (IS_CHARTYPEX(*cur, ctx_space)) ++cur;
+
+ // save lexeme position for error reporting
+ _cur_lexeme_pos = cur;
+
+ switch (*cur)
+ {
+ case 0:
+ _cur_lexeme = lex_eof;
+ break;
+
+ case '>':
+ if (*(cur+1) == '=')
+ {
+ cur += 2;
+ _cur_lexeme = lex_greater_or_equal;
+ }
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_greater;
+ }
+ break;
+
+ case '<':
+ if (*(cur+1) == '=')
+ {
+ cur += 2;
+ _cur_lexeme = lex_less_or_equal;
+ }
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_less;
+ }
+ break;
+
+ case '!':
+ if (*(cur+1) == '=')
+ {
+ cur += 2;
+ _cur_lexeme = lex_not_equal;
+ }
+ else
+ {
+ _cur_lexeme = lex_none;
+ }
+ break;
+
+ case '=':
+ cur += 1;
+ _cur_lexeme = lex_equal;
+
+ break;
+
+ case '+':
+ cur += 1;
+ _cur_lexeme = lex_plus;
+
+ break;
+
+ case '-':
+ cur += 1;
+ _cur_lexeme = lex_minus;
+
+ break;
+
+ case '*':
+ cur += 1;
+ _cur_lexeme = lex_multiply;
+
+ break;
+
+ case '|':
+ cur += 1;
+ _cur_lexeme = lex_union;
+
+ break;
+
+ case '$':
+ cur += 1;
+ _cur_lexeme = lex_var_ref;
+
+ break;
+
+ case '(':
+ cur += 1;
+ _cur_lexeme = lex_open_brace;
+
+ break;
+
+ case ')':
+ cur += 1;
+ _cur_lexeme = lex_close_brace;
+
+ break;
+
+ case '[':
+ cur += 1;
+ _cur_lexeme = lex_open_square_brace;
+
+ break;
+
+ case ']':
+ cur += 1;
+ _cur_lexeme = lex_close_square_brace;
+
+ break;
+
+ case ',':
+ cur += 1;
+ _cur_lexeme = lex_comma;
+
+ break;
+
+ case '/':
+ if (*(cur+1) == '/')
+ {
+ cur += 2;
+ _cur_lexeme = lex_double_slash;
+ }
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_slash;
+ }
+ break;
+
+ case '.':
+ if (*(cur+1) == '.')
+ {
+ cur += 2;
+ _cur_lexeme = lex_double_dot;
+ }
+ else if (IS_CHARTYPEX(*(cur+1), ctx_digit))
+ {
+ _cur_lexeme_contents.begin = cur; // .
+
+ ++cur;
+
+ while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_number;
+ }
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_dot;
+ }
+ break;
+
+ case '@':
+ cur += 1;
+ _cur_lexeme = lex_axis_attribute;
+
+ break;
+
+ case '"':
+ case '\'':
+ {
+ char_t terminator = *cur;
+
+ ++cur;
+
+ _cur_lexeme_contents.begin = cur;
+ while (*cur && *cur != terminator) cur++;
+ _cur_lexeme_contents.end = cur;
+
+ if (!*cur)
+ _cur_lexeme = lex_none;
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_quoted_string;
+ }
+
+ break;
+ }
+
+ case ':':
+ if (*(cur+1) == ':')
+ {
+ cur += 2;
+ _cur_lexeme = lex_double_colon;
+ }
+ else
+ {
+ _cur_lexeme = lex_none;
+ }
+ break;
+
+ default:
+ if (IS_CHARTYPEX(*cur, ctx_digit))
+ {
+ _cur_lexeme_contents.begin = cur;
+
+ while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+ if (*cur == '.')
+ {
+ cur++;
+
+ while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_number;
+ }
+ else if (IS_CHARTYPEX(*cur, ctx_start_symbol))
+ {
+ _cur_lexeme_contents.begin = cur;
+
+ while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+ if (cur[0] == ':')
+ {
+ if (cur[1] == '*') // namespace test ncname:*
+ {
+ cur += 2; // :*
+ }
+ else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
+ {
+ cur++; // :
+
+ while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+ }
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_string;
+ }
+ else
+ {
+ _cur_lexeme = lex_none;
+ }
+ }
+
+ _cur = cur;
+ }
+
+ lexeme_t current() const
+ {
+ return _cur_lexeme;
+ }
+
+ const char_t* current_pos() const
+ {
+ return _cur_lexeme_pos;
+ }
+
+ const xpath_lexer_string& contents() const
+ {
+ assert(_cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
+
+ return _cur_lexeme_contents;
+ }
+ };
+
+ enum ast_type_t
+ {
+ ast_op_or, // left or right
+ ast_op_and, // left and right
+ ast_op_equal, // left = right
+ ast_op_not_equal, // left != right
+ ast_op_less, // left < right
+ ast_op_greater, // left > right
+ ast_op_less_or_equal, // left <= right
+ ast_op_greater_or_equal, // left >= right
+ ast_op_add, // left + right
+ ast_op_subtract, // left - right
+ ast_op_multiply, // left * right
+ ast_op_divide, // left / right
+ ast_op_mod, // left % right
+ ast_op_negate, // left - right
+ ast_op_union, // left | right
+ ast_predicate, // apply predicate to set; next points to next predicate
+ ast_filter, // select * from left where right
+ ast_filter_posinv, // select * from left where right; proximity position invariant
+ ast_string_constant, // string constant
+ ast_number_constant, // number constant
+ ast_func_last, // last()
+ ast_func_position, // position()
+ ast_func_count, // count(left)
+ ast_func_id, // id(left)
+ ast_func_local_name_0, // local-name()
+ ast_func_local_name_1, // local-name(left)
+ ast_func_namespace_uri_0, // namespace-uri()
+ ast_func_namespace_uri_1, // namespace-uri(left)
+ ast_func_name_0, // name()
+ ast_func_name_1, // name(left)
+ ast_func_string_0, // string()
+ ast_func_string_1, // string(left)
+ ast_func_concat, // concat(left, right, siblings)
+ ast_func_starts_with, // starts_with(left, right)
+ ast_func_contains, // contains(left, right)
+ ast_func_substring_before, // substring-before(left, right)
+ ast_func_substring_after, // substring-after(left, right)
+ ast_func_substring_2, // substring(left, right)
+ ast_func_substring_3, // substring(left, right, third)
+ ast_func_string_length_0, // string-length()
+ ast_func_string_length_1, // string-length(left)
+ ast_func_normalize_space_0, // normalize-space()
+ ast_func_normalize_space_1, // normalize-space(left)
+ ast_func_translate, // translate(left, right, third)
+ ast_func_boolean, // boolean(left)
+ ast_func_not, // not(left)
+ ast_func_true, // true()
+ ast_func_false, // false()
+ ast_func_lang, // lang(left)
+ ast_func_number_0, // number()
+ ast_func_number_1, // number(left)
+ ast_func_sum, // sum(left)
+ ast_func_floor, // floor(left)
+ ast_func_ceiling, // ceiling(left)
+ ast_func_round, // round(left)
+ ast_step, // process set left with step
+ ast_step_root // select root node
+ };
+
+ enum axis_t
+ {
+ axis_ancestor,
+ axis_ancestor_or_self,
+ axis_attribute,
+ axis_child,
+ axis_descendant,
+ axis_descendant_or_self,
+ axis_following,
+ axis_following_sibling,
+ axis_namespace,
+ axis_parent,
+ axis_preceding,
+ axis_preceding_sibling,
+ axis_self
+ };
+
+ enum nodetest_t
+ {
+ nodetest_none,
+ nodetest_name,
+ nodetest_type_node,
+ nodetest_type_comment,
+ nodetest_type_pi,
+ nodetest_type_text,
+ nodetest_pi,
+ nodetest_all,
+ nodetest_all_in_namespace
+ };
+
+ template <axis_t N> struct axis_to_type
+ {
+ static const axis_t axis;
+ };
+
+ template <axis_t N> const axis_t axis_to_type<N>::axis = N;
+
+ class xpath_ast_node
+ {
+ private:
+ // node type
+ char _type;
+ char _rettype;
+
+ // for ast_step / ast_predicate
+ char _axis;
+ char _test;
+
+ // tree node structure
+ xpath_ast_node* _left;
+ xpath_ast_node* _right;
+ xpath_ast_node* _next;
+
+ union
+ {
+ // value for ast_string_constant
+ const char_t* string;
+ // value for ast_number_constant
+ double number;
+ // node test for ast_step (node name/namespace/node type/pi target)
+ const char_t* nodetest;
+ } _data;
+
+ xpath_ast_node(const xpath_ast_node&);
+ xpath_ast_node& operator=(const xpath_ast_node&);
+
+ template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp)
+ {
+ xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+ if (lt != xpath_type_node_set && rt != xpath_type_node_set)
+ {
+ if (lt == xpath_type_boolean || rt == xpath_type_boolean)
+ return comp(lhs->eval_boolean(c), rhs->eval_boolean(c));
+ else if (lt == xpath_type_number || rt == xpath_type_number)
+ return comp(lhs->eval_number(c), rhs->eval_number(c));
+ else if (lt == xpath_type_string || rt == xpath_type_string)
+ return comp(lhs->eval_string(c), rhs->eval_string(c));
+ }
+ else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
+ {
+ xpath_node_set ls = lhs->eval_node_set(c);
+ xpath_node_set rs = rhs->eval_node_set(c);
+
+ for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li)
+ for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ if (comp(string_value(*li), string_value(*ri)))
+ return true;
+ }
+
+ return false;
+ }
+ else
+ {
+ if (lt == xpath_type_node_set)
+ {
+ pstd::swap(lhs, rhs);
+ pstd::swap(lt, rt);
+ }
+
+ if (lt == xpath_type_boolean)
+ return comp(lhs->eval_boolean(c), rhs->eval_boolean(c));
+ else if (lt == xpath_type_number)
+ {
+ double l = lhs->eval_number(c);
+ xpath_node_set rs = rhs->eval_node_set(c);
+
+ for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ if (comp(l, convert_string_to_number(string_value(*ri).c_str())))
+ return true;
+ }
+
+ return false;
+ }
+ else if (lt == xpath_type_string)
+ {
+ xpath_string l = lhs->eval_string(c);
+ xpath_node_set rs = rhs->eval_node_set(c);
+
+ for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ if (comp(l, string_value(*ri)))
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ assert(!"Wrong types");
+ return false;
+ }
+
+ template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp)
+ {
+ xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+ if (lt != xpath_type_node_set && rt != xpath_type_node_set)
+ return comp(lhs->eval_number(c), rhs->eval_number(c));
+ else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
+ {
+ xpath_node_set ls = lhs->eval_node_set(c);
+ xpath_node_set rs = rhs->eval_node_set(c);
+
+ for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li)
+ {
+ double l = convert_string_to_number(string_value(*li).c_str());
+
+ for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ if (comp(l, convert_string_to_number(string_value(*ri).c_str())))
+ return true;
+ }
+ }
+
+ return false;
+ }
+ else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
+ {
+ double l = lhs->eval_number(c);
+ xpath_node_set rs = rhs->eval_node_set(c);
+
+ for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ if (comp(l, convert_string_to_number(string_value(*ri).c_str())))
+ return true;
+ }
+
+ return false;
+ }
+ else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
+ {
+ xpath_node_set ls = lhs->eval_node_set(c);
+ double r = rhs->eval_number(c);
+
+ for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li)
+ {
+ if (comp(convert_string_to_number(string_value(*li).c_str()), r))
+ return true;
+ }
+
+ return false;
+ }
+ else
+ {
+ assert(!"Wrong types");
+ return false;
+ }
+ }
+
+ void apply_predicate(xpath_node_set& ns, size_t first, xpath_ast_node* expr)
+ {
+ assert(ns.size() >= first);
+
+ size_t i = 1;
+ size_t size = ns.size() - first;
+
+ xpath_node_set::iterator last = ns.mut_begin() + first;
+
+ // remove_if... or well, sort of
+ for (xpath_node_set::iterator it = last; it != ns.end(); ++it, ++i)
+ {
+ xpath_context c(*it, i, size);
+
+ if (expr->rettype() == xpath_type_number)
+ {
+ if (expr->eval_number(c) == i)
+ *last++ = *it;
+ }
+ else if (expr->eval_boolean(c))
+ *last++ = *it;
+ }
+
+ ns.truncate(last);
+ }
+
+ void apply_predicates(xpath_node_set& ns, size_t first)
+ {
+ if (ns.size() == first) return;
+
+ for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
+ {
+ apply_predicate(ns, first, pred->_left);
+ }
+ }
+
+ void step_push(xpath_node_set& ns, const xml_attribute& a, const xml_node& parent)
+ {
+ if (!a) return;
+
+ const char_t* name = a.name();
+
+ // There are no attribute nodes corresponding to attributes that declare namespaces
+ // That is, "xmlns:..." or "xmlns"
+ if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
+
+ switch (_test)
+ {
+ case nodetest_name:
+ if (impl::strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent));
+ break;
+
+ case nodetest_type_node:
+ case nodetest_all:
+ ns.push_back(xpath_node(a, parent));
+ break;
+
+ case nodetest_all_in_namespace:
+ if (starts_with(name, _data.nodetest))
+ ns.push_back(xpath_node(a, parent));
+ break;
+
+ default:
+ ;
+ }
+ }
+
+ void step_push(xpath_node_set& ns, const xml_node& n)
+ {
+ if (!n) return;
+
+ switch (_test)
+ {
+ case nodetest_name:
+ if (n.type() == node_element && impl::strequal(n.name(), _data.nodetest)) ns.push_back(n);
+ break;
+
+ case nodetest_type_node:
+ ns.push_back(n);
+ break;
+
+ case nodetest_type_comment:
+ if (n.type() == node_comment)
+ ns.push_back(n);
+ break;
+
+ case nodetest_type_text:
+ if (n.type() == node_pcdata || n.type() == node_cdata)
+ ns.push_back(n);
+ break;
+
+ case nodetest_type_pi:
+ if (n.type() == node_pi)
+ ns.push_back(n);
+ break;
+
+ case nodetest_pi:
+ if (n.type() == node_pi && impl::strequal(n.name(), _data.nodetest))
+ ns.push_back(n);
+ break;
+
+ case nodetest_all:
+ if (n.type() == node_element)
+ ns.push_back(n);
+ break;
+
+ case nodetest_all_in_namespace:
+ if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
+ ns.push_back(n);
+ break;
+
+ default:
+ assert(!"Unknown axis");
+ }
+ }
+
+ template <class T> void step_fill(xpath_node_set& ns, const xml_node& n, T)
+ {
+ const axis_t axis = T::axis;
+
+ switch (axis)
+ {
+ case axis_attribute:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
+ step_push(ns, a, n);
+
+ break;
+ }
+
+ case axis_child:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ for (xml_node c = n.first_child(); c; c = c.next_sibling())
+ step_push(ns, c);
+
+ break;
+ }
+
+ case axis_descendant:
+ case axis_descendant_or_self:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ if (axis == axis_descendant_or_self)
+ step_push(ns, n);
+
+ xml_node cur = n.first_child();
+
+ while (cur && cur != n)
+ {
+ step_push(ns, cur);
+
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else
+ {
+ while (!cur.next_sibling() && cur != n)
+ cur = cur.parent();
+
+ if (cur != n) cur = cur.next_sibling();
+ }
+ }
+
+ break;
+ }
+
+ case axis_following_sibling:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
+ step_push(ns, c);
+
+ break;
+ }
+
+ case axis_preceding_sibling:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted;
+
+ for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
+ step_push(ns, c);
+
+ break;
+ }
+
+ case axis_following:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ xml_node cur = n;
+
+ // exit from this node so that we don't include descendants
+ while (cur && !cur.next_sibling()) cur = cur.parent();
+ cur = cur.next_sibling();
+
+ for (;;)
+ {
+ step_push(ns, cur);
+
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else
+ {
+ while (cur && !cur.next_sibling()) cur = cur.parent();
+ cur = cur.next_sibling();
+
+ if (!cur) break;
+ }
+ }
+
+ break;
+ }
+
+ case axis_preceding:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted;
+
+ xml_node cur = n;
+
+ while (cur && !cur.previous_sibling()) cur = cur.parent();
+ cur = cur.previous_sibling();
+
+ for (;;)
+ {
+ if (cur.last_child())
+ cur = cur.last_child();
+ else
+ {
+ // leaf node, can't be ancestor
+ step_push(ns, cur);
+
+ if (cur.previous_sibling())
+ cur = cur.previous_sibling();
+ else
+ {
+ do
+ {
+ cur = cur.parent();
+ if (!cur) break;
+
+ if (!node_is_ancestor(cur, n)) step_push(ns, cur);
+ }
+ while (!cur.previous_sibling());
+
+ cur = cur.previous_sibling();
+
+ if (!cur) break;
+ }
+ }
+ }
+
+ break;
+ }
+
+ case axis_ancestor:
+ case axis_ancestor_or_self:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted;
+
+ if (axis == axis_ancestor_or_self)
+ step_push(ns, n);
+
+ xml_node cur = n.parent();
+
+ while (cur)
+ {
+ step_push(ns, cur);
+
+ cur = cur.parent();
+ }
+
+ break;
+ }
+
+ case axis_self:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ step_push(ns, n);
+
+ break;
+ }
+
+ case axis_parent:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ if (n.parent()) step_push(ns, n.parent());
+
+ break;
+ }
+
+ default:
+ assert(!"Unimplemented axis");
+ }
+ }
+
+ template <class T> void step_fill(xpath_node_set& ns, const xml_attribute& a, const xml_node& p, T v)
+ {
+ const axis_t axis = T::axis;
+
+ switch (axis)
+ {
+ case axis_ancestor:
+ case axis_ancestor_or_self:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted;
+
+ if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
+ step_push(ns, a, p);
+
+ xml_node cur = p;
+
+ while (cur)
+ {
+ step_push(ns, cur);
+
+ cur = cur.parent();
+ }
+
+ break;
+ }
+
+ case axis_descendant_or_self:
+ case axis_self:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ if (_test == nodetest_type_node) // reject attributes based on principal node type test
+ step_push(ns, a, p);
+
+ break;
+ }
+
+ case axis_following:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ xml_node cur = p;
+
+ for (;;)
+ {
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else
+ {
+ while (cur && !cur.next_sibling()) cur = cur.parent();
+ cur = cur.next_sibling();
+
+ if (!cur) break;
+ }
+
+ step_push(ns, cur);
+ }
+
+ break;
+ }
+
+ case axis_parent:
+ {
+ ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
+
+ step_push(ns, p);
+
+ break;
+ }
+
+ case axis_preceding:
+ {
+ // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
+ step_fill(ns, p, v);
+ break;
+ }
+
+ default:
+ assert(!"Unimplemented axis");
+ }
+ }
+
+ template <class T> void step_do(xpath_node_set& ns, const xpath_context& c, T v)
+ {
+ const axis_t axis = T::axis;
+
+ assert(ns.empty());
+
+ switch (axis)
+ {
+ case axis_ancestor:
+ case axis_ancestor_or_self:
+ case axis_descendant_or_self:
+ case axis_following:
+ case axis_parent:
+ case axis_preceding:
+ case axis_self:
+ if (_left)
+ {
+ xpath_node_set s = _left->eval_node_set(c);
+
+ for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it)
+ {
+ size_t size = ns.size();
+
+ if (it->node())
+ step_fill(ns, it->node(), v);
+ else
+ step_fill(ns, it->attribute(), it->parent(), v);
+
+ apply_predicates(ns, size);
+ }
+ }
+ else
+ {
+ if (c.n.node()) step_fill(ns, c.n.node(), v);
+ else step_fill(ns, c.n.attribute(), c.n.parent(), v);
+
+ apply_predicates(ns, 0);
+ }
+
+ break;
+
+ case axis_following_sibling:
+ case axis_preceding_sibling:
+ case axis_attribute:
+ case axis_child:
+ case axis_descendant:
+ if (_left)
+ {
+ xpath_node_set s = _left->eval_node_set(c);
+
+ for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it)
+ {
+ size_t size = ns.size();
+
+ if (it->node())
+ step_fill(ns, it->node(), v);
+
+ apply_predicates(ns, size);
+ }
+ }
+ else if (c.n.node())
+ {
+ step_fill(ns, c.n.node(), v);
+
+ apply_predicates(ns, 0);
+ }
+
+ break;
+
+ case axis_namespace:
+ break;
+
+ default:
+ assert(!"Unimplemented axis");
+ }
+ }
+
+ public:
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype, const char_t* value):
+ _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
+ {
+ assert(type == ast_string_constant);
+ _data.string = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value):
+ _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
+ {
+ assert(type == ast_number_constant);
+ _data.number = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
+ _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(left), _right(right), _next(0)
+ {
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
+ _type((char)type), _rettype(xpath_type_node_set), _axis((char)axis), _test((char)test), _left(left), _right(0), _next(0)
+ {
+ _data.nodetest = contents;
+ }
+
+ void set_next(xpath_ast_node* value)
+ {
+ _next = value;
+ }
+
+ void set_right(xpath_ast_node* value)
+ {
+ _right = value;
+ }
+
+ bool eval_boolean(const xpath_context& c)
+ {
+ switch (_type)
+ {
+ case ast_op_or:
+ if (_left->eval_boolean(c)) return true;
+ else return _right->eval_boolean(c);
+
+ case ast_op_and:
+ if (!_left->eval_boolean(c)) return false;
+ else return _right->eval_boolean(c);
+
+ case ast_op_equal:
+ return compare_eq(_left, _right, c, pstd::equal_to());
+
+ case ast_op_not_equal:
+ return compare_eq(_left, _right, c, pstd::not_equal_to());
+
+ case ast_op_less:
+ return compare_rel(_left, _right, c, pstd::less());
+
+ case ast_op_greater:
+ return compare_rel(_right, _left, c, pstd::less());
+
+ case ast_op_less_or_equal:
+ return compare_rel(_left, _right, c, pstd::less_equal());
+
+ case ast_op_greater_or_equal:
+ return compare_rel(_right, _left, c, pstd::less_equal());
+
+ case ast_func_starts_with:
+ return starts_with(_left->eval_string(c).c_str(), _right->eval_string(c).c_str());
+
+ case ast_func_contains:
+ {
+ xpath_string lr = _left->eval_string(c);
+ xpath_string rr = _right->eval_string(c);
+
+ return find_substring(lr.c_str(), rr.c_str()) != 0;
+ }
+
+ case ast_func_boolean:
+ return _left->eval_boolean(c);
+
+ case ast_func_not:
+ return !_left->eval_boolean(c);
+
+ case ast_func_true:
+ return true;
+
+ case ast_func_false:
+ return false;
+
+ case ast_func_lang:
+ {
+ if (c.n.attribute()) return false;
+
+ xpath_string lang = _left->eval_string(c);
+
+ for (xml_node n = c.n.node(); n; n = n.parent())
+ {
+ xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
+
+ if (a)
+ {
+ const char_t* value = a.value();
+
+ // strnicmp / strncasecmp is not portable
+ for (const char_t* lit = lang.c_str(); *lit; ++lit)
+ {
+ if (tolower(*lit) != tolower(*value)) return false;
+ ++value;
+ }
+
+ return *value == 0 || *value == '-';
+ }
+ }
+
+ return false;
+ }
+
+ default:
+ {
+ switch (_rettype)
+ {
+ case xpath_type_number:
+ return convert_number_to_boolean(eval_number(c));
+
+ case xpath_type_string:
+ return !eval_string(c).empty();
+
+ case xpath_type_node_set:
+ return !eval_node_set(c).empty();
+
+ default:
+ assert(!"Wrong expression for return type boolean");
+ return false;
+ }
+ }
+ }
+ }
+
+ double eval_number(const xpath_context& c)
+ {
+ switch (_type)
+ {
+ case ast_op_add:
+ return _left->eval_number(c) + _right->eval_number(c);
+
+ case ast_op_subtract:
+ return _left->eval_number(c) - _right->eval_number(c);
+
+ case ast_op_multiply:
+ return _left->eval_number(c) * _right->eval_number(c);
+
+ case ast_op_divide:
+ return _left->eval_number(c) / _right->eval_number(c);
+
+ case ast_op_mod:
+ return fmod(_left->eval_number(c), _right->eval_number(c));
+
+ case ast_op_negate:
+ return -_left->eval_number(c);
+
+ case ast_number_constant:
+ return _data.number;
+
+ case ast_func_last:
+ return (double)c.size;
+
+ case ast_func_position:
+ return (double)c.position;
+
+ case ast_func_count:
+ return (double)_left->eval_node_set(c).size();
+
+ case ast_func_string_length_0:
+ return (double)string_value(c.n).length();
+
+ case ast_func_string_length_1:
+ return (double)_left->eval_string(c).length();
+
+ case ast_func_number_0:
+ return convert_string_to_number(string_value(c.n).c_str());
+
+ case ast_func_number_1:
+ return _left->eval_number(c);
+
+ case ast_func_sum:
+ {
+ double r = 0;
+
+ xpath_node_set ns = _left->eval_node_set(c);
+
+ for (xpath_node_set::const_iterator it = ns.begin(); it != ns.end(); ++it)
+ r += convert_string_to_number(string_value(*it).c_str());
+
+ return r;
+ }
+
+ case ast_func_floor:
+ {
+ double r = _left->eval_number(c);
+
+ return r == r ? floor(r) : r;
+ }
+
+ case ast_func_ceiling:
+ {
+ double r = _left->eval_number(c);
+
+ return r == r ? ceil(r) : r;
+ }
+
+ case ast_func_round:
+ return round_nearest_nzero(_left->eval_number(c));
+
+ default:
+ {
+ switch (_rettype)
+ {
+ case xpath_type_boolean:
+ return eval_boolean(c) ? 1 : 0;
+
+ case xpath_type_string:
+ return convert_string_to_number(eval_string(c).c_str());
+
+ case xpath_type_node_set:
+ return convert_string_to_number(eval_string(c).c_str());
+
+ default:
+ assert(!"Wrong expression for return type number");
+ return 0;
+ }
+
+ }
+ }
+ }
+
+ xpath_string eval_string(const xpath_context& c)
+ {
+ switch (_type)
+ {
+ case ast_string_constant:
+ return xpath_string_const(_data.string);
+
+ case ast_func_local_name_0:
+ {
+ xpath_node na = c.n;
+
+ return xpath_string_const(local_name(na));
+ }
+
+ case ast_func_local_name_1:
+ {
+ xpath_node_set ns = _left->eval_node_set(c);
+ xpath_node na = ns.first();
+
+ return xpath_string_const(local_name(na));
+ }
+
+ case ast_func_name_0:
+ {
+ xpath_node na = c.n;
+
+ return xpath_string_const(qualified_name(na));
+ }
+
+ case ast_func_name_1:
+ {
+ xpath_node_set ns = _left->eval_node_set(c);
+ xpath_node na = ns.first();
+
+ return xpath_string_const(qualified_name(na));
+ }
+
+ case ast_func_namespace_uri_0:
+ {
+ xpath_node na = c.n;
+
+ return xpath_string_const(namespace_uri(na));
+ }
+
+ case ast_func_namespace_uri_1:
+ {
+ xpath_node_set ns = _left->eval_node_set(c);
+ xpath_node na = ns.first();
+
+ return xpath_string_const(namespace_uri(na));
+ }
+
+ case ast_func_string_0:
+ return string_value(c.n);
+
+ case ast_func_string_1:
+ return _left->eval_string(c);
+
+ case ast_func_concat:
+ {
+ xpath_string r = _left->eval_string(c);
+
+ for (xpath_ast_node* n = _right; n; n = n->_next)
+ r.append(n->eval_string(c));
+
+ return r;
+ }
+
+ case ast_func_substring_before:
+ {
+ xpath_string s = _left->eval_string(c);
+ xpath_string p = _right->eval_string(c);
+
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
+
+ return pos ? xpath_string(s.c_str(), pos) : xpath_string();
+ }
+
+ case ast_func_substring_after:
+ {
+ xpath_string s = _left->eval_string(c);
+ xpath_string p = _right->eval_string(c);
+
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
+
+ return pos ? xpath_string(pos + p.length()) : xpath_string();
+ }
+
+ case ast_func_substring_2:
+ {
+ xpath_string s = _left->eval_string(c);
+ double first = round_nearest(_right->eval_number(c));
+
+ if (is_nan(first)) return xpath_string(); // NaN
+ else if (first >= s.length() + 1) return xpath_string();
+
+ size_t pos = first < 1 ? 1 : (size_t)first;
+
+ return xpath_string(s.c_str() + (pos - 1));
+ }
+
+ case ast_func_substring_3:
+ {
+ xpath_string s = _left->eval_string(c);
+ size_t s_length = s.length();
+
+ double first = round_nearest(_right->eval_number(c));
+ double last = first + round_nearest(_right->_next->eval_number(c));
+
+ if (is_nan(first) || is_nan(last)) return xpath_string();
+ else if (first >= s_length + 1) return xpath_string();
+ else if (first >= last) return xpath_string();
+
+ size_t pos = first < 1 ? 1 : (size_t)first;
+ size_t end = last >= s_length + 1 ? s_length + 1 : (size_t)last;
+
+ size_t size_requested = end - pos;
+ size_t size_to_end = s_length - pos + 1;
+
+ size_t size = size_requested < size_to_end ? size_requested : size_to_end;
+
+ return xpath_string(s.c_str() + pos - 1, s.c_str() + pos - 1 + size);
+ }
+
+ case ast_func_normalize_space_0:
+ {
+ xpath_string s = string_value(c.n);
+
+ normalize_space(s.data());
+
+ return s;
+ }
+
+ case ast_func_normalize_space_1:
+ {
+ xpath_string s = _left->eval_string(c);
+
+ normalize_space(s.data());
+
+ return s;
+ }
+
+ case ast_func_translate:
+ {
+ xpath_string s = _left->eval_string(c);
+ xpath_string from = _right->eval_string(c);
+ xpath_string to = _right->_next->eval_string(c);
+
+ translate(s.data(), from.c_str(), to.c_str());
+
+ return s;
+ }
+
+ default:
+ {
+ switch (_rettype)
+ {
+ case xpath_type_boolean:
+ return xpath_string_const(eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+
+ case xpath_type_number:
+ return convert_number_to_string(eval_number(c));
+
+ case xpath_type_node_set:
+ {
+ xpath_node_set ns = eval_node_set(c);
+ return ns.empty() ? xpath_string() : string_value(ns.first());
+ }
+
+ default:
+ assert(!"Wrong expression for return type string");
+ return xpath_string();
+ }
+ }
+ }
+ }
+
+ xpath_node_set eval_node_set(const xpath_context& c)
+ {
+ switch (_type)
+ {
+ case ast_op_union:
+ {
+ xpath_node_set ls = _left->eval_node_set(c);
+ xpath_node_set rs = _right->eval_node_set(c);
+
+ // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
+ ls._type = xpath_node_set::type_unsorted;
+
+ ls.append(rs.begin(), rs.end());
+
+ ls.remove_duplicates();
+
+ return ls;
+ }
+
+ case ast_filter:
+ case ast_filter_posinv:
+ {
+ xpath_node_set set = _left->eval_node_set(c);
+
+ // either expression is a number or it contains position() call; sort by document order
+ if (_type == ast_filter) set.sort();
+
+ apply_predicate(set, 0, _right);
+
+ return set;
+ }
+
+ case ast_func_id:
+ return xpath_node_set();
+
+ case ast_step:
+ {
+ xpath_node_set ns;
+
+ switch (_axis)
+ {
+ case axis_ancestor:
+ step_do(ns, c, axis_to_type<axis_ancestor>());
+ break;
+
+ case axis_ancestor_or_self:
+ step_do(ns, c, axis_to_type<axis_ancestor_or_self>());
+ break;
+
+ case axis_attribute:
+ step_do(ns, c, axis_to_type<axis_attribute>());
+ break;
+
+ case axis_child:
+ step_do(ns, c, axis_to_type<axis_child>());
+ break;
+
+ case axis_descendant:
+ step_do(ns, c, axis_to_type<axis_descendant>());
+ break;
+
+ case axis_descendant_or_self:
+ step_do(ns, c, axis_to_type<axis_descendant_or_self>());
+ break;
+
+ case axis_following:
+ step_do(ns, c, axis_to_type<axis_following>());
+ break;
+
+ case axis_following_sibling:
+ step_do(ns, c, axis_to_type<axis_following_sibling>());
+ break;
+
+ case axis_namespace:
+ step_do(ns, c, axis_to_type<axis_namespace>());
+ break;
+
+ case axis_parent:
+ step_do(ns, c, axis_to_type<axis_parent>());
+ break;
+
+ case axis_preceding:
+ step_do(ns, c, axis_to_type<axis_preceding>());
+ break;
+
+ case axis_preceding_sibling:
+ step_do(ns, c, axis_to_type<axis_preceding_sibling>());
+ break;
+
+ case axis_self:
+ step_do(ns, c, axis_to_type<axis_self>());
+ break;
+ }
+
+ ns.remove_duplicates();
+
+ return ns;
+ }
+
+ case ast_step_root:
+ {
+ assert(!_right); // root step can't have any predicates
+
+ xpath_node_set ns;
+ ns._type = xpath_node_set::type_sorted;
+
+ if (c.n.node()) ns.push_back(c.n.node().root());
+ else if (c.n.attribute()) ns.push_back(c.n.parent().root());
+
+ return ns;
+ }
+
+ default:
+ assert(!"Wrong expression for return type node set");
+ return xpath_node_set();
+ }
+ }
+
+ bool is_posinv()
+ {
+ switch (_type)
+ {
+ case ast_func_position:
+ return false;
+
+ case ast_string_constant:
+ case ast_number_constant:
+ // $$ case ast_variable:
+ return true;
+
+ case ast_step:
+ case ast_step_root:
+ return true;
+
+ case ast_predicate:
+ case ast_filter:
+ case ast_filter_posinv:
+ return true;
+
+ default:
+ if (_left && !_left->is_posinv()) return false;
+
+ for (xpath_ast_node* n = _right; n; n = n->_next)
+ if (!n->is_posinv()) return false;
+
+ return true;
+ }
+ }
+
+ xpath_value_type rettype() const
+ {
+ return static_cast<xpath_value_type>(_rettype);
+ }
+ };
+
+ struct xpath_parser
+ {
+ xpath_allocator& _alloc;
+ xpath_lexer _lexer;
+ const char_t* _query;
+ xpath_parse_result* _result;
+ jmp_buf _error_handler;
+
+ xpath_parser(const xpath_parser&);
+ xpath_parser& operator=(const xpath_parser&);
+
+ void throw_error(const char* message)
+ {
+ _result->error = message;
+ _result->offset = _lexer.current_pos() - _query;
+
+ longjmp(_error_handler, 1);
+ }
+
+ void* alloc_node()
+ {
+ void* result = _alloc.allocate(sizeof(xpath_ast_node));
+
+ if (!result) throw_error("Out of memory");
+
+ return result;
+ }
+
+ const char_t* alloc_string(const xpath_lexer_string& value)
+ {
+ if (value.begin)
+ {
+ size_t length = static_cast<size_t>(value.end - value.begin);
+
+ char_t* c = static_cast<char_t*>(_alloc.allocate((length + 1) * sizeof(char_t)));
+ if (!c) throw_error("Out of memory");
+
+ memcpy(c, value.begin, length * sizeof(char_t));
+ c[length] = 0;
+
+ return c;
+ }
+ else return 0;
+ }
+
+ xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
+ {
+ assert(argc <= 1);
+
+ if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
+ }
+
+ xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
+ {
+ switch (name.begin[0])
+ {
+ case 'b':
+ if (name == PUGIXML_TEXT("boolean") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
+
+ break;
+
+ case 'c':
+ if (name == PUGIXML_TEXT("count") && argc == 1)
+ {
+ if (args[0]->rettype() != xpath_type_node_set) throw_error("count() has to be applied to node set");
+ return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
+ }
+ else if (name == PUGIXML_TEXT("contains") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("concat") && argc >= 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
+
+ break;
+
+ case 'f':
+ if (name == PUGIXML_TEXT("false") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
+ else if (name == PUGIXML_TEXT("floor") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
+
+ break;
+
+ case 'i':
+ if (name == PUGIXML_TEXT("id") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
+
+ break;
+
+ case 'l':
+ if (name == PUGIXML_TEXT("last") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
+ else if (name == PUGIXML_TEXT("lang") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
+ else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
+ return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("name") && argc <= 1)
+ return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
+ else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
+ return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
+ else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("not") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
+ else if (name == PUGIXML_TEXT("number") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("position") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
+
+ break;
+
+ case 'r':
+ if (name == PUGIXML_TEXT("round") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
+
+ break;
+
+ case 's':
+ if (name == PUGIXML_TEXT("string") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
+ else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]);
+ else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
+ return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("sum") && argc == 1)
+ {
+ if (args[0]->rettype() != xpath_type_node_set) throw_error("sum() has to be applied to node set");
+ return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
+ }
+
+ break;
+
+ case 't':
+ if (name == PUGIXML_TEXT("translate") && argc == 3)
+ return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("true") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
+
+ break;
+ }
+
+ throw_error("Unrecognized function or wrong parameter count");
+
+ return 0;
+ }
+
+ axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
+ {
+ specified = true;
+
+ switch (name.begin[0])
+ {
+ case 'a':
+ if (name == PUGIXML_TEXT("ancestor"))
+ return axis_ancestor;
+ else if (name == PUGIXML_TEXT("ancestor-or-self"))
+ return axis_ancestor_or_self;
+ else if (name == PUGIXML_TEXT("attribute"))
+ return axis_attribute;
+
+ break;
+
+ case 'c':
+ if (name == PUGIXML_TEXT("child"))
+ return axis_child;
+
+ break;
+
+ case 'd':
+ if (name == PUGIXML_TEXT("descendant"))
+ return axis_descendant;
+ else if (name == PUGIXML_TEXT("descendant-or-self"))
+ return axis_descendant_or_self;
+
+ break;
+
+ case 'f':
+ if (name == PUGIXML_TEXT("following"))
+ return axis_following;
+ else if (name == PUGIXML_TEXT("following-sibling"))
+ return axis_following_sibling;
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("namespace"))
+ return axis_namespace;
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("parent"))
+ return axis_parent;
+ else if (name == PUGIXML_TEXT("preceding"))
+ return axis_preceding;
+ else if (name == PUGIXML_TEXT("preceding-sibling"))
+ return axis_preceding_sibling;
+
+ break;
+
+ case 's':
+ if (name == PUGIXML_TEXT("self"))
+ return axis_self;
+
+ break;
+ }
+
+ specified = false;
+ return axis_child;
+ }
+
+ nodetest_t parse_node_test_type(const xpath_lexer_string& name)
+ {
+ switch (name.begin[0])
+ {
+ case 'c':
+ if (name == PUGIXML_TEXT("comment"))
+ return nodetest_type_comment;
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("node"))
+ return nodetest_type_node;
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("processing-instruction"))
+ return nodetest_type_pi;
+
+ break;
+
+ case 't':
+ if (name == PUGIXML_TEXT("text"))
+ return nodetest_type_text;
+
+ break;
+ }
+
+ return nodetest_none;
+ }
+
+ // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
+ xpath_ast_node* parse_primary_expression()
+ {
+ switch (_lexer.current())
+ {
+ case lex_var_ref:
+ {
+ throw_error("Variables are not supported");
+
+ return 0;
+ }
+
+ case lex_open_brace:
+ {
+ _lexer.next();
+
+ xpath_ast_node* n = parse_expression();
+
+ if (_lexer.current() != lex_close_brace)
+ throw_error("Unmatched braces");
+
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_quoted_string:
+ {
+ const char_t* value = alloc_string(_lexer.contents());
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_number:
+ {
+ double value = 0;
+
+ if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value))
+ throw_error("Out of memory");
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_string:
+ {
+ xpath_ast_node* args[2] = {0};
+ size_t argc = 0;
+
+ xpath_lexer_string function = _lexer.contents();
+ _lexer.next();
+
+ xpath_ast_node* last_arg = 0;
+
+ if (_lexer.current() != lex_open_brace)
+ throw_error("Unrecognized function call");
+ _lexer.next();
+
+ if (_lexer.current() != lex_close_brace)
+ args[argc++] = parse_expression();
+
+ while (_lexer.current() != lex_close_brace)
+ {
+ if (_lexer.current() != lex_comma)
+ throw_error("No comma between function arguments");
+ _lexer.next();
+
+ xpath_ast_node* n = parse_expression();
+
+ if (argc < 2) args[argc] = n;
+ else last_arg->set_next(n);
+
+ argc++;
+ last_arg = n;
+ }
+
+ _lexer.next();
+
+ return parse_function(function, argc, args);
+ }
+
+ default:
+ throw_error("Unrecognizable primary expression");
+
+ return 0;
+ }
+ }
+
+ // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
+ // Predicate ::= '[' PredicateExpr ']'
+ // PredicateExpr ::= Expr
+ xpath_ast_node* parse_filter_expression()
+ {
+ xpath_ast_node* n = parse_primary_expression();
+
+ while (_lexer.current() == lex_open_square_brace)
+ {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_expression();
+
+ if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
+
+ bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
+
+ n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
+
+ if (_lexer.current() != lex_close_square_brace)
+ throw_error("Unmatched square brace");
+
+ _lexer.next();
+ }
+
+ return n;
+ }
+
+ // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
+ // AxisSpecifier ::= AxisName '::' | '@'?
+ // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
+ // NameTest ::= '*' | NCName ':' '*' | QName
+ // AbbreviatedStep ::= '.' | '..'
+ xpath_ast_node* parse_step(xpath_ast_node* set)
+ {
+ if (set && set->rettype() != xpath_type_node_set)
+ throw_error("Step has to be applied to node set");
+
+ bool axis_specified = false;
+ axis_t axis = axis_child; // implied child axis
+
+ if (_lexer.current() == lex_axis_attribute)
+ {
+ axis = axis_attribute;
+ axis_specified = true;
+
+ _lexer.next();
+ }
+ else if (_lexer.current() == lex_dot)
+ {
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
+ }
+ else if (_lexer.current() == lex_double_dot)
+ {
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
+ }
+
+ nodetest_t nt_type = nodetest_none;
+ xpath_lexer_string nt_name;
+
+ if (_lexer.current() == lex_string)
+ {
+ // node name test
+ nt_name = _lexer.contents();
+ _lexer.next();
+
+ // was it an axis name?
+ if (_lexer.current() == lex_double_colon)
+ {
+ // parse axis name
+ if (axis_specified) throw_error("Two axis specifiers in one step");
+
+ axis = parse_axis_name(nt_name, axis_specified);
+
+ if (!axis_specified) throw_error("Unknown axis");
+
+ // read actual node test
+ _lexer.next();
+
+ if (_lexer.current() == lex_multiply)
+ {
+ nt_type = nodetest_all;
+ nt_name = xpath_lexer_string();
+ _lexer.next();
+ }
+ else if (_lexer.current() == lex_string)
+ {
+ nt_name = _lexer.contents();
+ _lexer.next();
+ }
+ else throw_error("Unrecognized node test");
+ }
+
+ if (nt_type == nodetest_none)
+ {
+ // node type test or processing-instruction
+ if (_lexer.current() == lex_open_brace)
+ {
+ _lexer.next();
+
+ if (_lexer.current() == lex_close_brace)
+ {
+ _lexer.next();
+
+ nt_type = parse_node_test_type(nt_name);
+
+ if (nt_type == nodetest_none) throw_error("Unrecognized node type");
+
+ nt_name = xpath_lexer_string();
+ }
+ else if (nt_name == PUGIXML_TEXT("processing-instruction"))
+ {
+ if (_lexer.current() != lex_quoted_string)
+ throw_error("Only literals are allowed as arguments to processing-instruction()");
+
+ nt_type = nodetest_pi;
+ nt_name = _lexer.contents();
+ _lexer.next();
+
+ if (_lexer.current() != lex_close_brace)
+ throw_error("Unmatched brace near processing-instruction()");
+ _lexer.next();
+ }
+ else
+ throw_error("Unmatched brace near node type test");
+
+ }
+ // QName or NCName:*
+ else
+ {
+ const char_t* colon_pos = pstd::find(nt_name.begin, nt_name.end, ':');
+
+ if (colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:*
+ {
+ nt_name.end--; // erase *
+
+ nt_type = nodetest_all_in_namespace;
+ }
+ else nt_type = nodetest_name;
+ }
+ }
+ }
+ else if (_lexer.current() == lex_multiply)
+ {
+ nt_type = nodetest_all;
+ _lexer.next();
+ }
+ else throw_error("Unrecognized node test");
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
+
+ xpath_ast_node* last = 0;
+
+ while (_lexer.current() == lex_open_square_brace)
+ {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_expression();
+
+ xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
+
+ if (_lexer.current() != lex_close_square_brace)
+ throw_error("Unmatched square brace");
+ _lexer.next();
+
+ if (last) last->set_next(pred);
+ else n->set_right(pred);
+
+ last = pred;
+ }
+
+ return n;
+ }
+
+ // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
+ xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
+ {
+ xpath_ast_node* n = parse_step(set);
+
+ while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
+ {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ if (l == lex_double_slash)
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+ n = parse_step(n);
+ }
+
+ return n;
+ }
+
+ // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
+ // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
+ xpath_ast_node* parse_location_path()
+ {
+ if (_lexer.current() == lex_slash)
+ {
+ _lexer.next();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+
+ // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
+ lexeme_t l = _lexer.current();
+
+ if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
+ return parse_relative_location_path(n);
+ else
+ return n;
+ }
+ else if (_lexer.current() == lex_double_slash)
+ {
+ _lexer.next();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+ return parse_relative_location_path(n);
+ }
+ else
+ {
+ return parse_relative_location_path(0);
+ }
+ }
+
+ // PathExpr ::= LocationPath
+ // | FilterExpr
+ // | FilterExpr '/' RelativeLocationPath
+ // | FilterExpr '//' RelativeLocationPath
+ xpath_ast_node* parse_path_expression()
+ {
+ // Clarification.
+ // PathExpr begins with either LocationPath or FilterExpr.
+ // FilterExpr begins with PrimaryExpr
+ // PrimaryExpr begins with '$' in case of it being a variable reference,
+ // '(' in case of it being an expression, string literal, number constant or
+ // function call.
+
+ if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
+ _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
+ _lexer.current() == lex_string)
+ {
+ if (_lexer.current() == lex_string)
+ {
+ // This is either a function call, or not - if not, we shall proceed with location path
+ const char_t* state = _lexer.state();
+
+ while (IS_CHARTYPEX(*state, ctx_space)) ++state;
+
+ if (*state != '(') return parse_location_path();
+
+ // This looks like a function call; however this still can be a node-test. Check it.
+ if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
+ }
+
+ xpath_ast_node* n = parse_filter_expression();
+
+ if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
+ {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ if (l == lex_double_slash)
+ {
+ if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
+
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+ }
+
+ // select from location path
+ return parse_relative_location_path(n);
+ }
+
+ return n;
+ }
+ else return parse_location_path();
+ }
+
+ // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
+ xpath_ast_node* parse_union_expression()
+ {
+ xpath_ast_node* n = parse_path_expression();
+
+ while (_lexer.current() == lex_union)
+ {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_union_expression();
+
+ if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
+ throw_error("Union operator has to be applied to node sets");
+
+ n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
+ }
+
+ return n;
+ }
+
+ // UnaryExpr ::= UnionExpr | '-' UnaryExpr
+ xpath_ast_node* parse_unary_expression()
+ {
+ if (_lexer.current() == lex_minus)
+ {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_unary_expression();
+
+ return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
+ }
+ else return parse_union_expression();
+ }
+
+ // MultiplicativeExpr ::= UnaryExpr
+ // | MultiplicativeExpr '*' UnaryExpr
+ // | MultiplicativeExpr 'div' UnaryExpr
+ // | MultiplicativeExpr 'mod' UnaryExpr
+ xpath_ast_node* parse_multiplicative_expression()
+ {
+ xpath_ast_node* n = parse_unary_expression();
+
+ while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
+ (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
+ {
+ ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
+ _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_unary_expression();
+
+ n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
+ }
+
+ return n;
+ }
+
+ // AdditiveExpr ::= MultiplicativeExpr
+ // | AdditiveExpr '+' MultiplicativeExpr
+ // | AdditiveExpr '-' MultiplicativeExpr
+ xpath_ast_node* parse_additive_expression()
+ {
+ xpath_ast_node* n = parse_multiplicative_expression();
+
+ while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
+ {
+ lexeme_t l = _lexer.current();
+
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_multiplicative_expression();
+
+ n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
+ }
+
+ return n;
+ }
+
+ // RelationalExpr ::= AdditiveExpr
+ // | RelationalExpr '<' AdditiveExpr
+ // | RelationalExpr '>' AdditiveExpr
+ // | RelationalExpr '<=' AdditiveExpr
+ // | RelationalExpr '>=' AdditiveExpr
+ xpath_ast_node* parse_relational_expression()
+ {
+ xpath_ast_node* n = parse_additive_expression();
+
+ while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
+ _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
+ {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_additive_expression();
+
+ n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
+ l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
+ }
+
+ return n;
+ }
+
+ // EqualityExpr ::= RelationalExpr
+ // | EqualityExpr '=' RelationalExpr
+ // | EqualityExpr '!=' RelationalExpr
+ xpath_ast_node* parse_equality_expression()
+ {
+ xpath_ast_node* n = parse_relational_expression();
+
+ while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
+ {
+ lexeme_t l = _lexer.current();
+
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_relational_expression();
+
+ n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
+ }
+
+ return n;
+ }
+
+ // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
+ xpath_ast_node* parse_and_expression()
+ {
+ xpath_ast_node* n = parse_equality_expression();
+
+ while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
+ {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_equality_expression();
+
+ n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
+ }
+
+ return n;
+ }
+
+ // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
+ xpath_ast_node* parse_or_expression()
+ {
+ xpath_ast_node* n = parse_and_expression();
+
+ while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
+ {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_and_expression();
+
+ n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
+ }
+
+ return n;
+ }
+
+ // Expr ::= OrExpr
+ xpath_ast_node* parse_expression()
+ {
+ return parse_or_expression();
+ }
+
+ xpath_parser(const char_t* query, xpath_allocator& alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _result(result)
+ {
+ }
+
+ xpath_ast_node* parse()
+ {
+ xpath_ast_node* result = parse_expression();
+
+ if (_lexer.current() != lex_eof)
+ {
+ // there are still unparsed tokens left, error
+ throw_error("Incorrect query");
+ }
+
+ return result;
+ }
+
+ static xpath_ast_node* parse(const char_t* query, xpath_allocator& alloc, xpath_parse_result* result)
+ {
+ xpath_parser parser(query, alloc, result);
+
+ int error = setjmp(parser._error_handler);
+
+ return (error == 0) ? parser.parse() : 0;
+ }
+ };
+
+ const char* xpath_parse_result::description() const
+ {
+ return error ? error : "No error";
+ }
+
+ xpath_query::xpath_query(const char_t* query): _alloc(0), _root(0)
+ {
+ _result.error = 0;
+ _result.offset = 0;
+
+ _alloc = xpath_allocator::create();
+
+ if (!_alloc)
+ {
+ _result.error = "Out of memory";
+ }
+ else
+ {
+ _root = xpath_parser::parse(query, *_alloc, &_result);
+ }
+
+ if (!_root)
+ {
+ xpath_allocator::destroy(_alloc);
+ _alloc = 0;
+
+ #ifndef PUGIXML_NO_EXCEPTIONS
+ throw xpath_exception(_result);
+ #endif
+ }
+ }
+
+ xpath_query::~xpath_query()
+ {
+ xpath_allocator::destroy(_alloc);
+ }
+
+ xpath_value_type xpath_query::return_type() const
+ {
+ if (!_root) return xpath_type_none;
+
+ return _root->rettype();
+ }
+
+ bool xpath_query::evaluate_boolean(const xpath_node& n) const
+ {
+ if (!_root) return false;
+
+ xpath_context c(n, 1, 1);
+
+ return _root->eval_boolean(c);
+ }
+
+ double xpath_query::evaluate_number(const xpath_node& n) const
+ {
+ if (!_root) return gen_nan();
+
+ xpath_context c(n, 1, 1);
+
+ return _root->eval_number(c);
+ }
+
+#ifndef PUGIXML_NO_STL
+ string_t xpath_query::evaluate_string(const xpath_node& n) const
+ {
+ if (!_root) return string_t();
+
+ xpath_context c(n, 1, 1);
+
+ return _root->eval_string(c).c_str();
+ }
+#endif
+
+ size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
+ {
+ xpath_context c(n, 1, 1);
+ xpath_string r = _root ? _root->eval_string(c) : xpath_string();
+
+ size_t size = r.length() + 1;
+
+ // $$ zero-terminate?
+ if (capacity > 0) memcpy(buffer, r.c_str(), (size < capacity ? size : capacity) * sizeof(char_t));
+
+ return size;
+ }
+
+ xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
+ {
+ if (!_root) return xpath_node_set();
+
+ if (_root->rettype() != xpath_type_node_set)
+ {
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ return xpath_node_set();
+ #else
+ xpath_parse_result result = {"Expression does not evaluate to node set", 0};
+ throw xpath_exception(result);
+ #endif
+ }
+
+ xpath_context c(n, 1, 1);
+
+ return _root->eval_node_set(c);
+ }
+
+ const xpath_parse_result& xpath_query::result() const
+ {
+ return _result;
+ }
+
+ xpath_query::operator unspecified_bool_type() const
+ {
+ return _root ? &xpath_query::_root : 0;
+ }
+
+ bool xpath_query::operator!() const
+ {
+ return !_root;
+ }
+
+ xpath_node xml_node::select_single_node(const char_t* query) const
+ {
+ xpath_query q(query);
+ return select_single_node(q);
+ }
+
+ xpath_node xml_node::select_single_node(const xpath_query& query) const
+ {
+ xpath_node_set s = query.evaluate_node_set(*this);
+ return s.empty() ? xpath_node() : s.first();
+ }
+
+ xpath_node_set xml_node::select_nodes(const char_t* query) const
+ {
+ xpath_query q(query);
+ return select_nodes(q);
+ }
+
+ xpath_node_set xml_node::select_nodes(const xpath_query& query) const
+ {
+ return query.evaluate_node_set(*this);
+ }
+}
+
+#endif
+
/**
* Copyright (c) 2006-2010 Arseny Kapoulkine
*
diff --git a/src/pugixpath.cpp b/src/pugixpath.cpp
deleted file mode 100644
index 7ab466c..0000000
--- a/src/pugixpath.cpp
+++ /dev/null
@@ -1,3890 +0,0 @@
-/**
- * pugixml parser - version 0.9
- * --------------------------------------------------------
- * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
- * Report bugs and download new versions at http://code.google.com/p/pugixml/
- *
- * This library is distributed under the MIT License. See notice at the end
- * of this file.
- *
- * This work is based on the pugxml parser, which is:
- * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
- */
-
-#include "pugixml.hpp"
-
-#ifndef PUGIXML_NO_XPATH
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <setjmp.h>
-#include <ctype.h>
-#include <math.h>
-#include <float.h>
-
-#ifdef PUGIXML_WCHAR_MODE
-# include <wchar.h>
-#endif
-
-#include <new>
-
-#ifndef PUGIXML_NO_STL
-# include <string>
-#endif
-
-// int32_t
-#if !defined(_MSC_VER) || _MSC_VER >= 1600
-# include <stdint.h>
-#else
-typedef __int32 int32_t;
-#endif
-
-#if defined(_MSC_VER)
-# pragma warning(disable: 4127) // conditional expression is constant
-# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
-# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
-# pragma warning(disable: 4702) // unreachable code
-# pragma warning(disable: 4996) // this function or variable may be unsafe
-#endif
-
-#ifdef __INTEL_COMPILER
-# pragma warning(disable: 1478 1786) // function was declared "deprecated"
-#endif
-
-#ifdef __SNC__
-# pragma diag_suppress=237 // controlling expression is constant
-#endif
-
-// String utilities prototypes
-namespace pugi
-{
- namespace impl
- {
- size_t strlen(const char_t* s);
- bool strequal(const char_t* src, const char_t* dst);
- bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count);
- void widen_ascii(wchar_t* dest, const char* source);
- }
-}
-
-// STL replacements
-namespace pstd
-{
- struct equal_to
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs == rhs;
- }
- };
-
- struct not_equal_to
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs != rhs;
- }
- };
-
- struct less
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs < rhs;
- }
- };
-
- struct less_equal
- {
- template <typename T> bool operator()(const T& lhs, const T& rhs) const
- {
- return lhs <= rhs;
- }
- };
-
- template <typename T> void swap(T& lhs, T& rhs)
- {
- T temp = lhs;
- lhs = rhs;
- rhs = temp;
- }
-
- template <typename I, typename J> void copy(I begin, I end, J target)
- {
- while (begin != end) *target++ = *begin++;
- }
-
- template <typename I, typename T> I find(I begin, I end, T elem)
- {
- for (I it = begin; it != end; ++it)
- if (*it == elem)
- return it;
-
- return end;
- }
-
- template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
- {
- I result = begin;
-
- for (I it = begin + 1; it != end; ++it)
- if (pred(*it, *result))
- result = it;
-
- return result;
- }
-
- template <typename I> void reverse(I begin, I end)
- {
- while (begin + 1 < end) swap(*begin++, *--end);
- }
-
- template <typename I> I unique(I begin, I end)
- {
- // fast skip head
- while (begin + 1 < end && *begin != *(begin + 1)) begin++;
-
- if (begin == end) return begin;
-
- // last written element
- I write = begin++;
-
- // merge unique elements
- while (begin != end)
- {
- if (*begin != *write)
- *++write = *begin++;
- else
- begin++;
- }
-
- // past-the-end (write points to live element)
- return write + 1;
- }
-
- template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
- {
- while (begin != end) swap(*begin, *min_element(begin, end, pred)), begin++;
- }
-}
-
-namespace
-{
- using namespace pugi;
-
- class xpath_string
- {
- const char_t* _buffer;
- bool _uses_heap;
-
- static char_t* duplicate_string(const char_t* string, size_t length)
- {
- char_t* result = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
- if (!result) return 0; // $$ out of memory
-
- memcpy(result, string, length * sizeof(char_t));
- result[length] = 0;
-
- return result;
- }
-
- static char_t* duplicate_string(const char_t* string)
- {
- return duplicate_string(string, impl::strlen(string));
- }
-
- public:
- xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
- {
- }
-
- ~xpath_string()
- {
- if (_uses_heap) get_memory_deallocation_function()(const_cast<char_t*>(_buffer));
- }
-
- explicit xpath_string(const char_t* str)
- {
- bool empty = (*str == 0);
-
- _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(str);
- _uses_heap = !empty;
- }
-
- explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
- {
- }
-
- xpath_string(const char_t* begin, const char_t* end)
- {
- assert(begin <= end);
-
- if (begin != end)
- {
- _buffer = duplicate_string(begin, static_cast<size_t>(end - begin));
- _uses_heap = true;
- }
- else
- {
- _buffer = PUGIXML_TEXT("");
- _uses_heap = false;
- }
- }
-
- xpath_string(const xpath_string& o)
- {
- _buffer = o._uses_heap ? duplicate_string(o._buffer) : o._buffer;
- _uses_heap = o._uses_heap;
- }
-
- xpath_string& operator=(const xpath_string& o)
- {
- xpath_string temp(o);
- swap(temp);
-
- return *this;
- }
-
- void swap(xpath_string& o)
- {
- pstd::swap(_buffer, o._buffer);
- pstd::swap(_uses_heap, o._uses_heap);
- }
-
- void append(const xpath_string& o)
- {
- // skip empty sources
- if (!*o._buffer) return;
-
- // fast append for empty target and constant source
- if (!*_buffer && !o._uses_heap)
- {
- _buffer = o._buffer;
- _uses_heap = false;
- }
- else
- {
- // need to make heap copy
- size_t target_length = impl::strlen(_buffer);
- size_t source_length = impl::strlen(o._buffer);
- size_t length = target_length + source_length;
-
- // allocate new buffer
- char_t* result = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
- if (!result) return; // $$ out of memory
-
- // append both strings in the new buffer
- memcpy(result, _buffer, target_length * sizeof(char_t));
- memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
- result[length] = 0;
-
- // finalize
- xpath_string(result, true).swap(*this);
- }
- }
-
- const char_t* c_str() const
- {
- return _buffer;
- }
-
- size_t length() const
- {
- return impl::strlen(_buffer);
- }
-
- char_t* data()
- {
- // make private heap copy
- if (!_uses_heap)
- {
- _buffer = duplicate_string(_buffer);
- _uses_heap = true;
- }
-
- return const_cast<char_t*>(_buffer);
- }
-
- bool empty() const
- {
- return *_buffer == 0;
- }
-
- bool operator==(const xpath_string& o) const
- {
- return impl::strequal(_buffer, o._buffer);
- }
-
- bool operator!=(const xpath_string& o) const
- {
- return !impl::strequal(_buffer, o._buffer);
- }
- };
-
- xpath_string xpath_string_const(const char_t* str)
- {
- return xpath_string(str, false);
- }
-}
-
-namespace
-{
- using namespace pugi;
-
- enum chartypex
- {
- ctx_space = 1, // \r, \n, space, tab
- ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _
- ctx_digit = 4, // 0-9
- ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
- };
-
- const unsigned char chartypex_table[256] =
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63
- 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95
- 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127
-
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
- };
-
-#ifdef PUGIXML_WCHAR_MODE
- #define IS_CHARTYPEX(c, ct) ((static_cast<unsigned int>(c) < 128 ? chartypex_table[static_cast<unsigned int>(c)] : chartypex_table[128]) & (ct))
-#else
- #define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast<unsigned char>(c)] & (ct))
-#endif
-
- bool starts_with(const char_t* string, const char_t* pattern)
- {
- while (*pattern && *string == *pattern)
- {
- string++;
- pattern++;
- }
-
- return *pattern == 0;
- }
-
- const char_t* find_char(const char_t* s, char_t c)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return wcschr(s, c);
- #else
- return strchr(s, c);
- #endif
- }
-
- const char_t* find_substring(const char_t* s, const char_t* p)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return wcsstr(s, p);
- #else
- return strstr(s, p);
- #endif
- }
-
- xpath_string string_value(const xpath_node& na)
- {
- if (na.attribute())
- return xpath_string_const(na.attribute().value());
- else
- {
- const xml_node& n = na.node();
-
- switch (n.type())
- {
- case node_pcdata:
- case node_cdata:
- case node_comment:
- case node_pi:
- return xpath_string_const(n.value());
-
- case node_document:
- case node_element:
- {
- xpath_string result;
-
- xml_node cur = n.first_child();
-
- while (cur && cur != n)
- {
- if (cur.type() == node_pcdata || cur.type() == node_cdata)
- result.append(xpath_string_const(cur.value()));
-
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (!cur.next_sibling() && cur != n)
- cur = cur.parent();
-
- if (cur != n) cur = cur.next_sibling();
- }
- }
-
- return result;
- }
-
- default:
- return xpath_string();
- }
- }
- }
-
- unsigned int node_height(xml_node n)
- {
- unsigned int result = 0;
-
- while (n)
- {
- ++result;
- n = n.parent();
- }
-
- return result;
- }
-
- // precondition: node_height of ln is <= node_height of rn, ln != rn
- bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
- {
- assert(lh <= rh);
-
- while (lh < rh)
- {
- --rh;
- rn = rn.parent();
- }
-
- if (ln == rn) return true;
-
- while (ln.parent() != rn.parent())
- {
- ln = ln.parent();
- rn = rn.parent();
- }
-
- for (; ln; ln = ln.next_sibling())
- if (ln == rn)
- return true;
-
- return false;
- }
-
- bool node_is_ancestor(xml_node parent, xml_node node)
- {
- while (node && node != parent) node = node.parent();
-
- return parent && node == parent;
- }
-
- struct document_order_comparator
- {
- bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
- {
- xml_node ln = lhs.node(), rn = rhs.node();
-
- const void* lo = lhs.attribute() ? lhs.attribute().document_order() : ln.document_order();
- const void* ro = rhs.attribute() ? rhs.attribute().document_order() : rn.document_order();
-
- if (lo && ro) return lo < ro;
-
- if (lhs.attribute() && rhs.attribute())
- {
- if (lhs.parent() == rhs.parent())
- {
- for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
- if (a == rhs.attribute())
- return true;
-
- return false;
- }
-
- ln = lhs.parent();
- rn = rhs.parent();
- }
- else if (lhs.attribute())
- {
- if (lhs.parent() == rhs.node()) return false;
-
- ln = lhs.parent();
- }
- else if (rhs.attribute())
- {
- if (rhs.parent() == lhs.node()) return true;
-
- rn = rhs.parent();
- }
-
- if (ln == rn) return false;
-
- unsigned int lh = node_height(ln);
- unsigned int rh = node_height(rn);
-
- return (lh <= rh) ? node_is_before(ln, lh, rn, rh) : !node_is_before(rn, rh, ln, lh);
- }
- };
-
- struct duplicate_comparator
- {
- bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
- {
- if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
- else return rhs.attribute() ? false : lhs.node() < rhs.node();
- }
- };
-
- double gen_nan()
- {
- #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
- union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1];
- u[0].i = 0x7fc00000;
- return u[0].f;
- #else
- // fallback
- const volatile double zero = 0.0;
- return zero / zero;
- #endif
- }
-
- bool is_nan(double value)
- {
- #if defined(_MSC_VER) || defined(__BORLANDC__)
- return !!_isnan(value);
- #elif defined(fpclassify) && defined(FP_NAN)
- return fpclassify(value) == FP_NAN;
- #else
- // fallback
- const volatile double v = value;
- return v != v;
- #endif
- }
-
- const char_t* convert_number_to_string_special(double value)
- {
- #if defined(_MSC_VER) || defined(__BORLANDC__)
- if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
- if (_isnan(value)) return PUGIXML_TEXT("NaN");
- return PUGIXML_TEXT("-Infinity") + (value > 0);
- #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
- switch (fpclassify(value))
- {
- case FP_NAN:
- return PUGIXML_TEXT("NaN");
-
- case FP_INFINITE:
- return PUGIXML_TEXT("-Infinity") + (value > 0);
-
- case FP_ZERO:
- return PUGIXML_TEXT("0");
-
- default:
- return 0;
- }
- #else
- // fallback
- const volatile double v = value;
-
- if (v == 0) return PUGIXML_TEXT("0");
- if (v != v) return PUGIXML_TEXT("NaN");
- if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0);
- return 0;
- #endif
- }
-
- bool convert_number_to_boolean(double value)
- {
- return (value != 0 && !is_nan(value));
- }
-
- void truncate_zeros(char* begin, char* end)
- {
- while (begin != end && end[-1] == '0') end--;
-
- *end = 0;
- }
-
- // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
-#if defined(_MSC_VER) && _MSC_VER >= 1400
- void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
- {
- // get base values
- int sign, exponent;
- _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
-
- // truncate redundant zeros
- truncate_zeros(buffer, buffer + strlen(buffer));
-
- // fill results
- *out_mantissa = buffer;
- *out_exponent = exponent;
- }
-#else
- void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
- {
- // get a scientific notation value with IEEE DBL_DIG decimals
- sprintf(buffer, "%.*e", DBL_DIG, value);
- assert(strlen(buffer) < buffer_size);
- (void)!buffer_size;
-
- // get the exponent (possibly negative)
- char* exponent_string = strchr(buffer, 'e');
- assert(exponent_string);
-
- int exponent = atoi(exponent_string + 1);
-
- // extract mantissa string: skip sign
- char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
- assert(mantissa[0] != '0' && mantissa[1] == '.');
-
- // divide mantissa by 10 to eliminate integer part
- mantissa[1] = mantissa[0];
- mantissa++;
- exponent++;
-
- // remove extra mantissa digits and zero-terminate mantissa
- truncate_zeros(mantissa, exponent_string);
-
- // fill results
- *out_mantissa = mantissa;
- *out_exponent = exponent;
- }
-#endif
-
- xpath_string convert_number_to_string(double value)
- {
- // try special number conversion
- const char_t* special = convert_number_to_string_special(value);
- if (special) return xpath_string_const(special);
-
- // get mantissa + exponent form
- char mantissa_buffer[64];
-
- char* mantissa;
- int exponent;
- convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
-
- // make the number!
- char_t result[512];
- char_t* s = result;
-
- // sign
- if (value < 0) *s++ = '-';
-
- // integer part
- if (exponent <= 0)
- {
- *s++ = '0';
- }
- else
- {
- while (exponent > 0)
- {
- assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9);
- *s++ = *mantissa ? *mantissa++ : '0';
- exponent--;
- }
- }
-
- // fractional part
- if (*mantissa)
- {
- // decimal point
- *s++ = '.';
-
- // extra zeroes from negative exponent
- while (exponent < 0)
- {
- *s++ = '0';
- exponent++;
- }
-
- // extra mantissa digits
- while (*mantissa)
- {
- assert((unsigned)(*mantissa - '0') <= 9);
- *s++ = *mantissa++;
- }
- }
-
- // zero-terminate
- assert(s < result + sizeof(result) / sizeof(result[0]));
- *s = 0;
-
- return xpath_string(result);
- }
-
- bool check_string_to_number_format(const char_t* string)
- {
- // parse leading whitespace
- while (IS_CHARTYPEX(*string, ctx_space)) ++string;
-
- // parse sign
- if (*string == '-') ++string;
-
- if (!*string) return false;
-
- // if there is no integer part, there should be a decimal part with at least one digit
- if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false;
-
- // parse integer part
- while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
-
- // parse decimal part
- if (*string == '.')
- {
- ++string;
-
- while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
- }
-
- // parse trailing whitespace
- while (IS_CHARTYPEX(*string, ctx_space)) ++string;
-
- return *string == 0;
- }
-
- double convert_string_to_number(const char_t* string)
- {
- // check string format
- if (!check_string_to_number_format(string)) return gen_nan();
-
- // parse string
- #ifdef PUGIXML_WCHAR_MODE
- return wcstod(string, 0);
- #else
- return atof(string);
- #endif
- }
-
- bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
- {
- char_t buffer[32];
-
- size_t length = static_cast<size_t>(end - begin);
- char_t* scratch = buffer;
-
- if (length >= sizeof(buffer) / sizeof(buffer[0]))
- {
- // need to make dummy on-heap copy
- scratch = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
- if (!scratch) return false;
- }
-
- // copy string to zero-terminated buffer and perform conversion
- memcpy(scratch, begin, length * sizeof(char_t));
- scratch[length] = 0;
-
- *out_result = convert_string_to_number(scratch);
-
- // free dummy buffer
- if (scratch != buffer) get_memory_deallocation_function()(scratch);
-
- return true;
- }
-
- double round_nearest(double value)
- {
- return floor(value + 0.5);
- }
-
- double round_nearest_nzero(double value)
- {
- // same as round_nearest, but returns -0 for [-0.5, -0]
- // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
- return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
- }
-
- const char_t* qualified_name(const xpath_node& node)
- {
- return node.attribute() ? node.attribute().name() : node.node().name();
- }
-
- const char_t* local_name(const xpath_node& node)
- {
- const char_t* name = qualified_name(node);
- const char_t* p = find_char(name, ':');
-
- return p ? p + 1 : name;
- }
-
- struct namespace_uri_predicate
- {
- const char_t* prefix;
- size_t prefix_length;
-
- namespace_uri_predicate(const char_t* name)
- {
- const char_t* pos = find_char(name, ':');
-
- prefix = pos ? name : 0;
- prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
- }
-
- bool operator()(const xml_attribute& a) const
- {
- const char_t* name = a.name();
-
- if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
-
- return prefix ? name[5] == ':' && impl::strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
- }
- };
-
- const char_t* namespace_uri(const xml_node& node)
- {
- namespace_uri_predicate pred = node.name();
-
- xml_node p = node;
-
- while (p)
- {
- xml_attribute a = p.find_attribute(pred);
-
- if (a) return a.value();
-
- p = p.parent();
- }
-
- return PUGIXML_TEXT("");
- }
-
- const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
- {
- namespace_uri_predicate pred = attr.name();
-
- // Default namespace does not apply to attributes
- if (!pred.prefix) return PUGIXML_TEXT("");
-
- xml_node p = parent;
-
- while (p)
- {
- xml_attribute a = p.find_attribute(pred);
-
- if (a) return a.value();
-
- p = p.parent();
- }
-
- return PUGIXML_TEXT("");
- }
-
- const char_t* namespace_uri(const xpath_node& node)
- {
- return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
- }
-
- void normalize_space(char_t* buffer)
- {
- char_t* write = buffer;
-
- for (char_t* it = buffer; *it; )
- {
- char_t ch = *it++;
-
- if (IS_CHARTYPEX(ch, ctx_space))
- {
- // replace whitespace sequence with single space
- while (IS_CHARTYPEX(*it, ctx_space)) it++;
-
- // avoid leading spaces
- if (write != buffer) *write++ = ' ';
- }
- else *write++ = ch;
- }
-
- // remove trailing space
- if (write != buffer && IS_CHARTYPEX(write[-1], ctx_space)) write--;
-
- // zero-terminate
- *write = 0;
- }
-
- void translate(char_t* buffer, const char_t* from, const char_t* to)
- {
- size_t to_length = impl::strlen(to);
-
- char_t* write = buffer;
-
- while (*buffer)
- {
- #ifdef __DMC__
- volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from buffer otherwise)
- #endif
- char_t ch = *buffer++;
-
- const char_t* pos = find_char(from, ch);
-
- if (!pos)
- *write++ = ch; // do not process
- else if (static_cast<size_t>(pos - from) < to_length)
- *write++ = to[pos - from]; // replace
- }
-
- // zero-terminate
- *write = 0;
- }
-}
-
-namespace pugi
-{
-#ifndef PUGIXML_NO_EXCEPTIONS
- xpath_exception::xpath_exception(const xpath_parse_result& result): _result(result)
- {
- assert(result.error);
- }
-
- const char* xpath_exception::what() const throw()
- {
- return _result.error;
- }
-
- const xpath_parse_result& xpath_exception::result() const
- {
- return _result;
- }
-#endif
-
- const size_t xpath_memory_block_size = 4096;
-
- class xpath_allocator
- {
- struct memory_block
- {
- memory_block* next;
-
- char data[xpath_memory_block_size];
- };
-
- memory_block* _root;
- size_t _root_size;
-
- memory_block _first;
-
- public:
- static xpath_allocator* create()
- {
- void* memory = get_memory_allocation_function()(sizeof(xpath_allocator));
-
- return new (memory) xpath_allocator();
- }
-
- static void destroy(xpath_allocator* alloc)
- {
- if (!alloc) return;
-
- // free all allocated pages
- memory_block* cur = alloc->_root;
- memory_block* first = &alloc->_first;
-
- while (cur != first)
- {
- memory_block* next = cur->next;
-
- get_memory_deallocation_function()(cur);
-
- cur = next;
- }
-
- // free allocator memory (with the first page)
- get_memory_deallocation_function()(alloc);
- }
-
- xpath_allocator()
- {
- _root = &_first;
- _root_size = 0;
- _first.next = 0;
- }
-
- void* allocate(size_t size)
- {
- // align size so that we're able to store pointers in subsequent blocks
- size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-
- if (_root_size + size <= xpath_memory_block_size)
- {
- void* buf = _root->data + _root_size;
- _root_size += size;
- return buf;
- }
- else
- {
- size_t block_data_size = (size > xpath_memory_block_size) ? size : xpath_memory_block_size;
- size_t block_size = block_data_size + offsetof(memory_block, data);
-
- memory_block* block = static_cast<memory_block*>(get_memory_allocation_function()(block_size));
- if (!block) return 0;
-
- block->next = _root;
-
- _root = block;
- _root_size = size;
-
- return block->data;
- }
- }
- };
-
- xpath_node::xpath_node()
- {
- }
-
- xpath_node::xpath_node(const xml_node& node): _node(node)
- {
- }
-
- xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): _node(parent), _attribute(attribute)
- {
- }
-
- xml_node xpath_node::node() const
- {
- return _attribute ? xml_node() : _node;
- }
-
- xml_attribute xpath_node::attribute() const
- {
- return _attribute;
- }
-
- xml_node xpath_node::parent() const
- {
- return _attribute ? _node : _node.parent();
- }
-
- xpath_node::operator xpath_node::unspecified_bool_type() const
- {
- return (_node || _attribute) ? &xpath_node::_node : 0;
- }
-
- bool xpath_node::operator!() const
- {
- return !(_node || _attribute);
- }
-
- bool xpath_node::operator==(const xpath_node& n) const
- {
- return _node == n._node && _attribute == n._attribute;
- }
-
- bool xpath_node::operator!=(const xpath_node& n) const
- {
- return _node != n._node || _attribute != n._attribute;
- }
-
-#ifdef __BORLANDC__
- bool operator&&(const xpath_node& lhs, bool rhs)
- {
- return (bool)lhs && rhs;
- }
-
- bool operator||(const xpath_node& lhs, bool rhs)
- {
- return (bool)lhs || rhs;
- }
-#endif
-
- xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage), _eos(&_storage + 1)
- {
- }
-
- xpath_node_set::~xpath_node_set()
- {
- if (_begin != &_storage) get_memory_deallocation_function()(_begin);
- }
-
- xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage), _eos(&_storage + 1)
- {
- if (ns.size() == 1)
- {
- _storage = *ns._begin;
- _end = _eos;
- }
- else
- {
- append(ns.begin(), ns.end());
- }
- }
-
- xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
- {
- if (this == &ns) return *this;
-
- // clear & append
- _type = ns._type;
- _end = _begin;
-
- append(ns.begin(), ns.end());
-
- return *this;
- }
-
- void xpath_node_set::swap(xpath_node_set& ns)
- {
- pstd::swap(_type, ns._type);
- pstd::swap(_storage, ns._storage);
- pstd::swap(_begin, ns._begin);
- pstd::swap(_end, ns._end);
- pstd::swap(_eos, ns._eos);
- }
-
- xpath_node_set::type_t xpath_node_set::type() const
- {
- return _type;
- }
-
- size_t xpath_node_set::size() const
- {
- return _end - _begin;
- }
-
- bool xpath_node_set::empty() const
- {
- return size() == 0;
- }
-
- const xpath_node& xpath_node_set::operator[](size_t index) const
- {
- assert(index < size());
- return _begin[index];
- }
-
- xpath_node_set::iterator xpath_node_set::mut_begin()
- {
- return _begin;
- }
-
- xpath_node_set::const_iterator xpath_node_set::begin() const
- {
- return _begin;
- }
-
- xpath_node_set::const_iterator xpath_node_set::end() const
- {
- return _end;
- }
-
- void xpath_node_set::sort(bool reverse)
- {
- pstd::sort(_begin, _end, document_order_comparator());
-
- if (reverse) pstd::reverse(_begin, _end);
-
- _type = reverse ? type_sorted_reverse : type_sorted;
- }
-
- void xpath_node_set::push_back(const xpath_node& n)
- {
- if (_end == _eos)
- append(&n, &n + 1);
- else
- {
- *_end = n;
- ++_end;
- }
- }
-
- void xpath_node_set::append(const_iterator begin, const_iterator end)
- {
- if (begin == end) return;
-
- size_t count = end - begin;
- size_t size = _end - _begin;
- size_t capacity = _eos - _begin;
-
- if (capacity < size + count)
- {
- if (capacity < 2) capacity = 2;
-
- while (capacity < size + count) capacity += capacity / 2;
-
- xpath_node* storage = static_cast<xpath_node*>(get_memory_allocation_function()(capacity * sizeof(xpath_node)));
- if (!storage) return; // $$ out of memory
-
- pstd::copy(_begin, _end, storage);
- // memcpy(storage, _begin, size * sizeof(xpath_node));
-
- if (_begin != &_storage) get_memory_deallocation_function()(_begin);
-
- _begin = storage;
- _end = storage + size;
- _eos = storage + capacity;
- }
-
- pstd::copy(begin, end, _end);
- // memcpy(_end, begin, count * sizeof(xpath_node));
- _end += count;
- }
-
- void xpath_node_set::truncate(iterator it)
- {
- _end = it;
- }
-
- xpath_node xpath_node_set::first() const
- {
- if (empty()) return xpath_node();
-
- switch (_type)
- {
- case type_sorted: return *_begin;
- case type_sorted_reverse: return *(_end - 1);
- case type_unsorted: return *pstd::min_element(_begin, _end, document_order_comparator());
- default: return xpath_node();
- }
- }
-
- void xpath_node_set::remove_duplicates()
- {
- if (_type == type_unsorted)
- {
- pstd::sort(_begin, _end, duplicate_comparator());
- }
-
- truncate(pstd::unique(_begin, _end));
- }
-
- struct xpath_context
- {
- xpath_node n;
- size_t position, size;
-
- xpath_context(const xpath_node& n, size_t position, size_t size): n(n), position(position), size(size)
- {
- }
- };
-
- enum lexeme_t
- {
- lex_none = 0,
- lex_equal,
- lex_not_equal,
- lex_less,
- lex_greater,
- lex_less_or_equal,
- lex_greater_or_equal,
- lex_plus,
- lex_minus,
- lex_multiply,
- lex_union,
- lex_var_ref,
- lex_open_brace,
- lex_close_brace,
- lex_quoted_string,
- lex_number,
- lex_slash,
- lex_double_slash,
- lex_open_square_brace,
- lex_close_square_brace,
- lex_string,
- lex_comma,
- lex_axis_attribute,
- lex_dot,
- lex_double_dot,
- lex_double_colon,
- lex_eof
- };
-
- struct xpath_lexer_string
- {
- const char_t* begin;
- const char_t* end;
-
- xpath_lexer_string(): begin(0), end(0)
- {
- }
-
- bool operator==(const char_t* other) const
- {
- size_t length = static_cast<size_t>(end - begin);
-
- return impl::strequalrange(other, begin, length);
- }
- };
-
- class xpath_lexer
- {
- const char_t* _cur;
- const char_t* _cur_lexeme_pos;
- xpath_lexer_string _cur_lexeme_contents;
-
- lexeme_t _cur_lexeme;
-
- public:
- explicit xpath_lexer(const char_t* query): _cur(query)
- {
- next();
- }
-
- const char_t* state() const
- {
- return _cur;
- }
-
- void next()
- {
- const char_t* cur = _cur;
-
- while (IS_CHARTYPEX(*cur, ctx_space)) ++cur;
-
- // save lexeme position for error reporting
- _cur_lexeme_pos = cur;
-
- switch (*cur)
- {
- case 0:
- _cur_lexeme = lex_eof;
- break;
-
- case '>':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_greater_or_equal;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_greater;
- }
- break;
-
- case '<':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_less_or_equal;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_less;
- }
- break;
-
- case '!':
- if (*(cur+1) == '=')
- {
- cur += 2;
- _cur_lexeme = lex_not_equal;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- break;
-
- case '=':
- cur += 1;
- _cur_lexeme = lex_equal;
-
- break;
-
- case '+':
- cur += 1;
- _cur_lexeme = lex_plus;
-
- break;
-
- case '-':
- cur += 1;
- _cur_lexeme = lex_minus;
-
- break;
-
- case '*':
- cur += 1;
- _cur_lexeme = lex_multiply;
-
- break;
-
- case '|':
- cur += 1;
- _cur_lexeme = lex_union;
-
- break;
-
- case '$':
- cur += 1;
- _cur_lexeme = lex_var_ref;
-
- break;
-
- case '(':
- cur += 1;
- _cur_lexeme = lex_open_brace;
-
- break;
-
- case ')':
- cur += 1;
- _cur_lexeme = lex_close_brace;
-
- break;
-
- case '[':
- cur += 1;
- _cur_lexeme = lex_open_square_brace;
-
- break;
-
- case ']':
- cur += 1;
- _cur_lexeme = lex_close_square_brace;
-
- break;
-
- case ',':
- cur += 1;
- _cur_lexeme = lex_comma;
-
- break;
-
- case '/':
- if (*(cur+1) == '/')
- {
- cur += 2;
- _cur_lexeme = lex_double_slash;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_slash;
- }
- break;
-
- case '.':
- if (*(cur+1) == '.')
- {
- cur += 2;
- _cur_lexeme = lex_double_dot;
- }
- else if (IS_CHARTYPEX(*(cur+1), ctx_digit))
- {
- _cur_lexeme_contents.begin = cur; // .
-
- ++cur;
-
- while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_number;
- }
- else
- {
- cur += 1;
- _cur_lexeme = lex_dot;
- }
- break;
-
- case '@':
- cur += 1;
- _cur_lexeme = lex_axis_attribute;
-
- break;
-
- case '"':
- case '\'':
- {
- char_t terminator = *cur;
-
- ++cur;
-
- _cur_lexeme_contents.begin = cur;
- while (*cur && *cur != terminator) cur++;
- _cur_lexeme_contents.end = cur;
-
- if (!*cur)
- _cur_lexeme = lex_none;
- else
- {
- cur += 1;
- _cur_lexeme = lex_quoted_string;
- }
-
- break;
- }
-
- case ':':
- if (*(cur+1) == ':')
- {
- cur += 2;
- _cur_lexeme = lex_double_colon;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- break;
-
- default:
- if (IS_CHARTYPEX(*cur, ctx_digit))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-
- if (*cur == '.')
- {
- cur++;
-
- while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_number;
- }
- else if (IS_CHARTYPEX(*cur, ctx_start_symbol))
- {
- _cur_lexeme_contents.begin = cur;
-
- while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-
- if (cur[0] == ':')
- {
- if (cur[1] == '*') // namespace test ncname:*
- {
- cur += 2; // :*
- }
- else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
- {
- cur++; // :
-
- while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
- }
- }
-
- _cur_lexeme_contents.end = cur;
-
- _cur_lexeme = lex_string;
- }
- else
- {
- _cur_lexeme = lex_none;
- }
- }
-
- _cur = cur;
- }
-
- lexeme_t current() const
- {
- return _cur_lexeme;
- }
-
- const char_t* current_pos() const
- {
- return _cur_lexeme_pos;
- }
-
- const xpath_lexer_string& contents() const
- {
- assert(_cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
-
- return _cur_lexeme_contents;
- }
- };
-
- enum ast_type_t
- {
- ast_op_or, // left or right
- ast_op_and, // left and right
- ast_op_equal, // left = right
- ast_op_not_equal, // left != right
- ast_op_less, // left < right
- ast_op_greater, // left > right
- ast_op_less_or_equal, // left <= right
- ast_op_greater_or_equal, // left >= right
- ast_op_add, // left + right
- ast_op_subtract, // left - right
- ast_op_multiply, // left * right
- ast_op_divide, // left / right
- ast_op_mod, // left % right
- ast_op_negate, // left - right
- ast_op_union, // left | right
- ast_predicate, // apply predicate to set; next points to next predicate
- ast_filter, // select * from left where right
- ast_filter_posinv, // select * from left where right; proximity position invariant
- ast_string_constant, // string constant
- ast_number_constant, // number constant
- ast_func_last, // last()
- ast_func_position, // position()
- ast_func_count, // count(left)
- ast_func_id, // id(left)
- ast_func_local_name_0, // local-name()
- ast_func_local_name_1, // local-name(left)
- ast_func_namespace_uri_0, // namespace-uri()
- ast_func_namespace_uri_1, // namespace-uri(left)
- ast_func_name_0, // name()
- ast_func_name_1, // name(left)
- ast_func_string_0, // string()
- ast_func_string_1, // string(left)
- ast_func_concat, // concat(left, right, siblings)
- ast_func_starts_with, // starts_with(left, right)
- ast_func_contains, // contains(left, right)
- ast_func_substring_before, // substring-before(left, right)
- ast_func_substring_after, // substring-after(left, right)
- ast_func_substring_2, // substring(left, right)
- ast_func_substring_3, // substring(left, right, third)
- ast_func_string_length_0, // string-length()
- ast_func_string_length_1, // string-length(left)
- ast_func_normalize_space_0, // normalize-space()
- ast_func_normalize_space_1, // normalize-space(left)
- ast_func_translate, // translate(left, right, third)
- ast_func_boolean, // boolean(left)
- ast_func_not, // not(left)
- ast_func_true, // true()
- ast_func_false, // false()
- ast_func_lang, // lang(left)
- ast_func_number_0, // number()
- ast_func_number_1, // number(left)
- ast_func_sum, // sum(left)
- ast_func_floor, // floor(left)
- ast_func_ceiling, // ceiling(left)
- ast_func_round, // round(left)
- ast_step, // process set left with step
- ast_step_root // select root node
- };
-
- enum axis_t
- {
- axis_ancestor,
- axis_ancestor_or_self,
- axis_attribute,
- axis_child,
- axis_descendant,
- axis_descendant_or_self,
- axis_following,
- axis_following_sibling,
- axis_namespace,
- axis_parent,
- axis_preceding,
- axis_preceding_sibling,
- axis_self
- };
-
- enum nodetest_t
- {
- nodetest_none,
- nodetest_name,
- nodetest_type_node,
- nodetest_type_comment,
- nodetest_type_pi,
- nodetest_type_text,
- nodetest_pi,
- nodetest_all,
- nodetest_all_in_namespace
- };
-
- template <axis_t N> struct axis_to_type
- {
- static const axis_t axis;
- };
-
- template <axis_t N> const axis_t axis_to_type<N>::axis = N;
-
- class xpath_ast_node
- {
- private:
- // node type
- char _type;
- char _rettype;
-
- // for ast_step / ast_predicate
- char _axis;
- char _test;
-
- // tree node structure
- xpath_ast_node* _left;
- xpath_ast_node* _right;
- xpath_ast_node* _next;
-
- union
- {
- // value for ast_string_constant
- const char_t* string;
- // value for ast_number_constant
- double number;
- // node test for ast_step (node name/namespace/node type/pi target)
- const char_t* nodetest;
- } _data;
-
- xpath_ast_node(const xpath_ast_node&);
- xpath_ast_node& operator=(const xpath_ast_node&);
-
- template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp)
- {
- xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
- if (lt != xpath_type_node_set && rt != xpath_type_node_set)
- {
- if (lt == xpath_type_boolean || rt == xpath_type_boolean)
- return comp(lhs->eval_boolean(c), rhs->eval_boolean(c));
- else if (lt == xpath_type_number || rt == xpath_type_number)
- return comp(lhs->eval_number(c), rhs->eval_number(c));
- else if (lt == xpath_type_string || rt == xpath_type_string)
- return comp(lhs->eval_string(c), rhs->eval_string(c));
- }
- else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_node_set ls = lhs->eval_node_set(c);
- xpath_node_set rs = rhs->eval_node_set(c);
-
- for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li)
- for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
- {
- if (comp(string_value(*li), string_value(*ri)))
- return true;
- }
-
- return false;
- }
- else
- {
- if (lt == xpath_type_node_set)
- {
- pstd::swap(lhs, rhs);
- pstd::swap(lt, rt);
- }
-
- if (lt == xpath_type_boolean)
- return comp(lhs->eval_boolean(c), rhs->eval_boolean(c));
- else if (lt == xpath_type_number)
- {
- double l = lhs->eval_number(c);
- xpath_node_set rs = rhs->eval_node_set(c);
-
- for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
- {
- if (comp(l, convert_string_to_number(string_value(*ri).c_str())))
- return true;
- }
-
- return false;
- }
- else if (lt == xpath_type_string)
- {
- xpath_string l = lhs->eval_string(c);
- xpath_node_set rs = rhs->eval_node_set(c);
-
- for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
- {
- if (comp(l, string_value(*ri)))
- return true;
- }
-
- return false;
- }
- }
-
- assert(!"Wrong types");
- return false;
- }
-
- template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const Comp& comp)
- {
- xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
- if (lt != xpath_type_node_set && rt != xpath_type_node_set)
- return comp(lhs->eval_number(c), rhs->eval_number(c));
- else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
- {
- xpath_node_set ls = lhs->eval_node_set(c);
- xpath_node_set rs = rhs->eval_node_set(c);
-
- for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li)
- {
- double l = convert_string_to_number(string_value(*li).c_str());
-
- for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
- {
- if (comp(l, convert_string_to_number(string_value(*ri).c_str())))
- return true;
- }
- }
-
- return false;
- }
- else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
- {
- double l = lhs->eval_number(c);
- xpath_node_set rs = rhs->eval_node_set(c);
-
- for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
- {
- if (comp(l, convert_string_to_number(string_value(*ri).c_str())))
- return true;
- }
-
- return false;
- }
- else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
- {
- xpath_node_set ls = lhs->eval_node_set(c);
- double r = rhs->eval_number(c);
-
- for (xpath_node_set::const_iterator li = ls.begin(); li != ls.end(); ++li)
- {
- if (comp(convert_string_to_number(string_value(*li).c_str()), r))
- return true;
- }
-
- return false;
- }
- else
- {
- assert(!"Wrong types");
- return false;
- }
- }
-
- void apply_predicate(xpath_node_set& ns, size_t first, xpath_ast_node* expr)
- {
- assert(ns.size() >= first);
-
- size_t i = 1;
- size_t size = ns.size() - first;
-
- xpath_node_set::iterator last = ns.mut_begin() + first;
-
- // remove_if... or well, sort of
- for (xpath_node_set::iterator it = last; it != ns.end(); ++it, ++i)
- {
- xpath_context c(*it, i, size);
-
- if (expr->rettype() == xpath_type_number)
- {
- if (expr->eval_number(c) == i)
- *last++ = *it;
- }
- else if (expr->eval_boolean(c))
- *last++ = *it;
- }
-
- ns.truncate(last);
- }
-
- void apply_predicates(xpath_node_set& ns, size_t first)
- {
- if (ns.size() == first) return;
-
- for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
- {
- apply_predicate(ns, first, pred->_left);
- }
- }
-
- void step_push(xpath_node_set& ns, const xml_attribute& a, const xml_node& parent)
- {
- if (!a) return;
-
- const char_t* name = a.name();
-
- // There are no attribute nodes corresponding to attributes that declare namespaces
- // That is, "xmlns:..." or "xmlns"
- if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
-
- switch (_test)
- {
- case nodetest_name:
- if (impl::strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent));
- break;
-
- case nodetest_type_node:
- case nodetest_all:
- ns.push_back(xpath_node(a, parent));
- break;
-
- case nodetest_all_in_namespace:
- if (starts_with(name, _data.nodetest))
- ns.push_back(xpath_node(a, parent));
- break;
-
- default:
- ;
- }
- }
-
- void step_push(xpath_node_set& ns, const xml_node& n)
- {
- if (!n) return;
-
- switch (_test)
- {
- case nodetest_name:
- if (n.type() == node_element && impl::strequal(n.name(), _data.nodetest)) ns.push_back(n);
- break;
-
- case nodetest_type_node:
- ns.push_back(n);
- break;
-
- case nodetest_type_comment:
- if (n.type() == node_comment)
- ns.push_back(n);
- break;
-
- case nodetest_type_text:
- if (n.type() == node_pcdata || n.type() == node_cdata)
- ns.push_back(n);
- break;
-
- case nodetest_type_pi:
- if (n.type() == node_pi)
- ns.push_back(n);
- break;
-
- case nodetest_pi:
- if (n.type() == node_pi && impl::strequal(n.name(), _data.nodetest))
- ns.push_back(n);
- break;
-
- case nodetest_all:
- if (n.type() == node_element)
- ns.push_back(n);
- break;
-
- case nodetest_all_in_namespace:
- if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
- ns.push_back(n);
- break;
-
- default:
- assert(!"Unknown axis");
- }
- }
-
- template <class T> void step_fill(xpath_node_set& ns, const xml_node& n, T)
- {
- const axis_t axis = T::axis;
-
- switch (axis)
- {
- case axis_attribute:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
- step_push(ns, a, n);
-
- break;
- }
-
- case axis_child:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- for (xml_node c = n.first_child(); c; c = c.next_sibling())
- step_push(ns, c);
-
- break;
- }
-
- case axis_descendant:
- case axis_descendant_or_self:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- if (axis == axis_descendant_or_self)
- step_push(ns, n);
-
- xml_node cur = n.first_child();
-
- while (cur && cur != n)
- {
- step_push(ns, cur);
-
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (!cur.next_sibling() && cur != n)
- cur = cur.parent();
-
- if (cur != n) cur = cur.next_sibling();
- }
- }
-
- break;
- }
-
- case axis_following_sibling:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
- step_push(ns, c);
-
- break;
- }
-
- case axis_preceding_sibling:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted;
-
- for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
- step_push(ns, c);
-
- break;
- }
-
- case axis_following:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- xml_node cur = n;
-
- // exit from this node so that we don't include descendants
- while (cur && !cur.next_sibling()) cur = cur.parent();
- cur = cur.next_sibling();
-
- for (;;)
- {
- step_push(ns, cur);
-
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (cur && !cur.next_sibling()) cur = cur.parent();
- cur = cur.next_sibling();
-
- if (!cur) break;
- }
- }
-
- break;
- }
-
- case axis_preceding:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted;
-
- xml_node cur = n;
-
- while (cur && !cur.previous_sibling()) cur = cur.parent();
- cur = cur.previous_sibling();
-
- for (;;)
- {
- if (cur.last_child())
- cur = cur.last_child();
- else
- {
- // leaf node, can't be ancestor
- step_push(ns, cur);
-
- if (cur.previous_sibling())
- cur = cur.previous_sibling();
- else
- {
- do
- {
- cur = cur.parent();
- if (!cur) break;
-
- if (!node_is_ancestor(cur, n)) step_push(ns, cur);
- }
- while (!cur.previous_sibling());
-
- cur = cur.previous_sibling();
-
- if (!cur) break;
- }
- }
- }
-
- break;
- }
-
- case axis_ancestor:
- case axis_ancestor_or_self:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted;
-
- if (axis == axis_ancestor_or_self)
- step_push(ns, n);
-
- xml_node cur = n.parent();
-
- while (cur)
- {
- step_push(ns, cur);
-
- cur = cur.parent();
- }
-
- break;
- }
-
- case axis_self:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- step_push(ns, n);
-
- break;
- }
-
- case axis_parent:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- if (n.parent()) step_push(ns, n.parent());
-
- break;
- }
-
- default:
- assert(!"Unimplemented axis");
- }
- }
-
- template <class T> void step_fill(xpath_node_set& ns, const xml_attribute& a, const xml_node& p, T v)
- {
- const axis_t axis = T::axis;
-
- switch (axis)
- {
- case axis_ancestor:
- case axis_ancestor_or_self:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_unsorted;
-
- if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
- step_push(ns, a, p);
-
- xml_node cur = p;
-
- while (cur)
- {
- step_push(ns, cur);
-
- cur = cur.parent();
- }
-
- break;
- }
-
- case axis_descendant_or_self:
- case axis_self:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- if (_test == nodetest_type_node) // reject attributes based on principal node type test
- step_push(ns, a, p);
-
- break;
- }
-
- case axis_following:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- xml_node cur = p;
-
- for (;;)
- {
- if (cur.first_child())
- cur = cur.first_child();
- else if (cur.next_sibling())
- cur = cur.next_sibling();
- else
- {
- while (cur && !cur.next_sibling()) cur = cur.parent();
- cur = cur.next_sibling();
-
- if (!cur) break;
- }
-
- step_push(ns, cur);
- }
-
- break;
- }
-
- case axis_parent:
- {
- ns._type = ns.empty() ? xpath_node_set::type_sorted : xpath_node_set::type_unsorted;
-
- step_push(ns, p);
-
- break;
- }
-
- case axis_preceding:
- {
- // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
- step_fill(ns, p, v);
- break;
- }
-
- default:
- assert(!"Unimplemented axis");
- }
- }
-
- template <class T> void step_do(xpath_node_set& ns, const xpath_context& c, T v)
- {
- const axis_t axis = T::axis;
-
- assert(ns.empty());
-
- switch (axis)
- {
- case axis_ancestor:
- case axis_ancestor_or_self:
- case axis_descendant_or_self:
- case axis_following:
- case axis_parent:
- case axis_preceding:
- case axis_self:
- if (_left)
- {
- xpath_node_set s = _left->eval_node_set(c);
-
- for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it)
- {
- size_t size = ns.size();
-
- if (it->node())
- step_fill(ns, it->node(), v);
- else
- step_fill(ns, it->attribute(), it->parent(), v);
-
- apply_predicates(ns, size);
- }
- }
- else
- {
- if (c.n.node()) step_fill(ns, c.n.node(), v);
- else step_fill(ns, c.n.attribute(), c.n.parent(), v);
-
- apply_predicates(ns, 0);
- }
-
- break;
-
- case axis_following_sibling:
- case axis_preceding_sibling:
- case axis_attribute:
- case axis_child:
- case axis_descendant:
- if (_left)
- {
- xpath_node_set s = _left->eval_node_set(c);
-
- for (xpath_node_set::const_iterator it = s.begin(); it != s.end(); ++it)
- {
- size_t size = ns.size();
-
- if (it->node())
- step_fill(ns, it->node(), v);
-
- apply_predicates(ns, size);
- }
- }
- else if (c.n.node())
- {
- step_fill(ns, c.n.node(), v);
-
- apply_predicates(ns, 0);
- }
-
- break;
-
- case axis_namespace:
- break;
-
- default:
- assert(!"Unimplemented axis");
- }
- }
-
- public:
- xpath_ast_node(ast_type_t type, xpath_value_type rettype, const char_t* value):
- _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_string_constant);
- _data.string = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value):
- _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
- {
- assert(type == ast_number_constant);
- _data.number = value;
- }
-
- xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
- _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(left), _right(right), _next(0)
- {
- }
-
- xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
- _type((char)type), _rettype(xpath_type_node_set), _axis((char)axis), _test((char)test), _left(left), _right(0), _next(0)
- {
- _data.nodetest = contents;
- }
-
- void set_next(xpath_ast_node* value)
- {
- _next = value;
- }
-
- void set_right(xpath_ast_node* value)
- {
- _right = value;
- }
-
- bool eval_boolean(const xpath_context& c)
- {
- switch (_type)
- {
- case ast_op_or:
- if (_left->eval_boolean(c)) return true;
- else return _right->eval_boolean(c);
-
- case ast_op_and:
- if (!_left->eval_boolean(c)) return false;
- else return _right->eval_boolean(c);
-
- case ast_op_equal:
- return compare_eq(_left, _right, c, pstd::equal_to());
-
- case ast_op_not_equal:
- return compare_eq(_left, _right, c, pstd::not_equal_to());
-
- case ast_op_less:
- return compare_rel(_left, _right, c, pstd::less());
-
- case ast_op_greater:
- return compare_rel(_right, _left, c, pstd::less());
-
- case ast_op_less_or_equal:
- return compare_rel(_left, _right, c, pstd::less_equal());
-
- case ast_op_greater_or_equal:
- return compare_rel(_right, _left, c, pstd::less_equal());
-
- case ast_func_starts_with:
- return starts_with(_left->eval_string(c).c_str(), _right->eval_string(c).c_str());
-
- case ast_func_contains:
- {
- xpath_string lr = _left->eval_string(c);
- xpath_string rr = _right->eval_string(c);
-
- return find_substring(lr.c_str(), rr.c_str()) != 0;
- }
-
- case ast_func_boolean:
- return _left->eval_boolean(c);
-
- case ast_func_not:
- return !_left->eval_boolean(c);
-
- case ast_func_true:
- return true;
-
- case ast_func_false:
- return false;
-
- case ast_func_lang:
- {
- if (c.n.attribute()) return false;
-
- xpath_string lang = _left->eval_string(c);
-
- for (xml_node n = c.n.node(); n; n = n.parent())
- {
- xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
-
- if (a)
- {
- const char_t* value = a.value();
-
- // strnicmp / strncasecmp is not portable
- for (const char_t* lit = lang.c_str(); *lit; ++lit)
- {
- if (tolower(*lit) != tolower(*value)) return false;
- ++value;
- }
-
- return *value == 0 || *value == '-';
- }
- }
-
- return false;
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_number:
- return convert_number_to_boolean(eval_number(c));
-
- case xpath_type_string:
- return !eval_string(c).empty();
-
- case xpath_type_node_set:
- return !eval_node_set(c).empty();
-
- default:
- assert(!"Wrong expression for return type boolean");
- return false;
- }
- }
- }
- }
-
- double eval_number(const xpath_context& c)
- {
- switch (_type)
- {
- case ast_op_add:
- return _left->eval_number(c) + _right->eval_number(c);
-
- case ast_op_subtract:
- return _left->eval_number(c) - _right->eval_number(c);
-
- case ast_op_multiply:
- return _left->eval_number(c) * _right->eval_number(c);
-
- case ast_op_divide:
- return _left->eval_number(c) / _right->eval_number(c);
-
- case ast_op_mod:
- return fmod(_left->eval_number(c), _right->eval_number(c));
-
- case ast_op_negate:
- return -_left->eval_number(c);
-
- case ast_number_constant:
- return _data.number;
-
- case ast_func_last:
- return (double)c.size;
-
- case ast_func_position:
- return (double)c.position;
-
- case ast_func_count:
- return (double)_left->eval_node_set(c).size();
-
- case ast_func_string_length_0:
- return (double)string_value(c.n).length();
-
- case ast_func_string_length_1:
- return (double)_left->eval_string(c).length();
-
- case ast_func_number_0:
- return convert_string_to_number(string_value(c.n).c_str());
-
- case ast_func_number_1:
- return _left->eval_number(c);
-
- case ast_func_sum:
- {
- double r = 0;
-
- xpath_node_set ns = _left->eval_node_set(c);
-
- for (xpath_node_set::const_iterator it = ns.begin(); it != ns.end(); ++it)
- r += convert_string_to_number(string_value(*it).c_str());
-
- return r;
- }
-
- case ast_func_floor:
- {
- double r = _left->eval_number(c);
-
- return r == r ? floor(r) : r;
- }
-
- case ast_func_ceiling:
- {
- double r = _left->eval_number(c);
-
- return r == r ? ceil(r) : r;
- }
-
- case ast_func_round:
- return round_nearest_nzero(_left->eval_number(c));
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_boolean:
- return eval_boolean(c) ? 1 : 0;
-
- case xpath_type_string:
- return convert_string_to_number(eval_string(c).c_str());
-
- case xpath_type_node_set:
- return convert_string_to_number(eval_string(c).c_str());
-
- default:
- assert(!"Wrong expression for return type number");
- return 0;
- }
-
- }
- }
- }
-
- xpath_string eval_string(const xpath_context& c)
- {
- switch (_type)
- {
- case ast_string_constant:
- return xpath_string_const(_data.string);
-
- case ast_func_local_name_0:
- {
- xpath_node na = c.n;
-
- return xpath_string_const(local_name(na));
- }
-
- case ast_func_local_name_1:
- {
- xpath_node_set ns = _left->eval_node_set(c);
- xpath_node na = ns.first();
-
- return xpath_string_const(local_name(na));
- }
-
- case ast_func_name_0:
- {
- xpath_node na = c.n;
-
- return xpath_string_const(qualified_name(na));
- }
-
- case ast_func_name_1:
- {
- xpath_node_set ns = _left->eval_node_set(c);
- xpath_node na = ns.first();
-
- return xpath_string_const(qualified_name(na));
- }
-
- case ast_func_namespace_uri_0:
- {
- xpath_node na = c.n;
-
- return xpath_string_const(namespace_uri(na));
- }
-
- case ast_func_namespace_uri_1:
- {
- xpath_node_set ns = _left->eval_node_set(c);
- xpath_node na = ns.first();
-
- return xpath_string_const(namespace_uri(na));
- }
-
- case ast_func_string_0:
- return string_value(c.n);
-
- case ast_func_string_1:
- return _left->eval_string(c);
-
- case ast_func_concat:
- {
- xpath_string r = _left->eval_string(c);
-
- for (xpath_ast_node* n = _right; n; n = n->_next)
- r.append(n->eval_string(c));
-
- return r;
- }
-
- case ast_func_substring_before:
- {
- xpath_string s = _left->eval_string(c);
- xpath_string p = _right->eval_string(c);
-
- const char_t* pos = find_substring(s.c_str(), p.c_str());
-
- return pos ? xpath_string(s.c_str(), pos) : xpath_string();
- }
-
- case ast_func_substring_after:
- {
- xpath_string s = _left->eval_string(c);
- xpath_string p = _right->eval_string(c);
-
- const char_t* pos = find_substring(s.c_str(), p.c_str());
-
- return pos ? xpath_string(pos + p.length()) : xpath_string();
- }
-
- case ast_func_substring_2:
- {
- xpath_string s = _left->eval_string(c);
- double first = round_nearest(_right->eval_number(c));
-
- if (is_nan(first)) return xpath_string(); // NaN
- else if (first >= s.length() + 1) return xpath_string();
-
- size_t pos = first < 1 ? 1 : (size_t)first;
-
- return xpath_string(s.c_str() + (pos - 1));
- }
-
- case ast_func_substring_3:
- {
- xpath_string s = _left->eval_string(c);
- size_t s_length = s.length();
-
- double first = round_nearest(_right->eval_number(c));
- double last = first + round_nearest(_right->_next->eval_number(c));
-
- if (is_nan(first) || is_nan(last)) return xpath_string();
- else if (first >= s_length + 1) return xpath_string();
- else if (first >= last) return xpath_string();
-
- size_t pos = first < 1 ? 1 : (size_t)first;
- size_t end = last >= s_length + 1 ? s_length + 1 : (size_t)last;
-
- size_t size_requested = end - pos;
- size_t size_to_end = s_length - pos + 1;
-
- size_t size = size_requested < size_to_end ? size_requested : size_to_end;
-
- return xpath_string(s.c_str() + pos - 1, s.c_str() + pos - 1 + size);
- }
-
- case ast_func_normalize_space_0:
- {
- xpath_string s = string_value(c.n);
-
- normalize_space(s.data());
-
- return s;
- }
-
- case ast_func_normalize_space_1:
- {
- xpath_string s = _left->eval_string(c);
-
- normalize_space(s.data());
-
- return s;
- }
-
- case ast_func_translate:
- {
- xpath_string s = _left->eval_string(c);
- xpath_string from = _right->eval_string(c);
- xpath_string to = _right->_next->eval_string(c);
-
- translate(s.data(), from.c_str(), to.c_str());
-
- return s;
- }
-
- default:
- {
- switch (_rettype)
- {
- case xpath_type_boolean:
- return xpath_string_const(eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
-
- case xpath_type_number:
- return convert_number_to_string(eval_number(c));
-
- case xpath_type_node_set:
- {
- xpath_node_set ns = eval_node_set(c);
- return ns.empty() ? xpath_string() : string_value(ns.first());
- }
-
- default:
- assert(!"Wrong expression for return type string");
- return xpath_string();
- }
- }
- }
- }
-
- xpath_node_set eval_node_set(const xpath_context& c)
- {
- switch (_type)
- {
- case ast_op_union:
- {
- xpath_node_set ls = _left->eval_node_set(c);
- xpath_node_set rs = _right->eval_node_set(c);
-
- // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
- ls._type = xpath_node_set::type_unsorted;
-
- ls.append(rs.begin(), rs.end());
-
- ls.remove_duplicates();
-
- return ls;
- }
-
- case ast_filter:
- case ast_filter_posinv:
- {
- xpath_node_set set = _left->eval_node_set(c);
-
- // either expression is a number or it contains position() call; sort by document order
- if (_type == ast_filter) set.sort();
-
- apply_predicate(set, 0, _right);
-
- return set;
- }
-
- case ast_func_id:
- return xpath_node_set();
-
- case ast_step:
- {
- xpath_node_set ns;
-
- switch (_axis)
- {
- case axis_ancestor:
- step_do(ns, c, axis_to_type<axis_ancestor>());
- break;
-
- case axis_ancestor_or_self:
- step_do(ns, c, axis_to_type<axis_ancestor_or_self>());
- break;
-
- case axis_attribute:
- step_do(ns, c, axis_to_type<axis_attribute>());
- break;
-
- case axis_child:
- step_do(ns, c, axis_to_type<axis_child>());
- break;
-
- case axis_descendant:
- step_do(ns, c, axis_to_type<axis_descendant>());
- break;
-
- case axis_descendant_or_self:
- step_do(ns, c, axis_to_type<axis_descendant_or_self>());
- break;
-
- case axis_following:
- step_do(ns, c, axis_to_type<axis_following>());
- break;
-
- case axis_following_sibling:
- step_do(ns, c, axis_to_type<axis_following_sibling>());
- break;
-
- case axis_namespace:
- step_do(ns, c, axis_to_type<axis_namespace>());
- break;
-
- case axis_parent:
- step_do(ns, c, axis_to_type<axis_parent>());
- break;
-
- case axis_preceding:
- step_do(ns, c, axis_to_type<axis_preceding>());
- break;
-
- case axis_preceding_sibling:
- step_do(ns, c, axis_to_type<axis_preceding_sibling>());
- break;
-
- case axis_self:
- step_do(ns, c, axis_to_type<axis_self>());
- break;
- }
-
- ns.remove_duplicates();
-
- return ns;
- }
-
- case ast_step_root:
- {
- assert(!_right); // root step can't have any predicates
-
- xpath_node_set ns;
- ns._type = xpath_node_set::type_sorted;
-
- if (c.n.node()) ns.push_back(c.n.node().root());
- else if (c.n.attribute()) ns.push_back(c.n.parent().root());
-
- return ns;
- }
-
- default:
- assert(!"Wrong expression for return type node set");
- return xpath_node_set();
- }
- }
-
- bool is_posinv()
- {
- switch (_type)
- {
- case ast_func_position:
- return false;
-
- case ast_string_constant:
- case ast_number_constant:
- // $$ case ast_variable:
- return true;
-
- case ast_step:
- case ast_step_root:
- return true;
-
- case ast_predicate:
- case ast_filter:
- case ast_filter_posinv:
- return true;
-
- default:
- if (_left && !_left->is_posinv()) return false;
-
- for (xpath_ast_node* n = _right; n; n = n->_next)
- if (!n->is_posinv()) return false;
-
- return true;
- }
- }
-
- xpath_value_type rettype() const
- {
- return static_cast<xpath_value_type>(_rettype);
- }
- };
-
- struct xpath_parser
- {
- xpath_allocator& _alloc;
- xpath_lexer _lexer;
- const char_t* _query;
- xpath_parse_result* _result;
- jmp_buf _error_handler;
-
- xpath_parser(const xpath_parser&);
- xpath_parser& operator=(const xpath_parser&);
-
- void throw_error(const char* message)
- {
- _result->error = message;
- _result->offset = _lexer.current_pos() - _query;
-
- longjmp(_error_handler, 1);
- }
-
- void* alloc_node()
- {
- void* result = _alloc.allocate(sizeof(xpath_ast_node));
-
- if (!result) throw_error("Out of memory");
-
- return result;
- }
-
- const char_t* alloc_string(const xpath_lexer_string& value)
- {
- if (value.begin)
- {
- size_t length = static_cast<size_t>(value.end - value.begin);
-
- char_t* c = static_cast<char_t*>(_alloc.allocate((length + 1) * sizeof(char_t)));
- if (!c) throw_error("Out of memory");
-
- memcpy(c, value.begin, length * sizeof(char_t));
- c[length] = 0;
-
- return c;
- }
- else return 0;
- }
-
- xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
- {
- assert(argc <= 1);
-
- if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
-
- return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
- }
-
- xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
- {
- switch (name.begin[0])
- {
- case 'b':
- if (name == PUGIXML_TEXT("boolean") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
-
- break;
-
- case 'c':
- if (name == PUGIXML_TEXT("count") && argc == 1)
- {
- if (args[0]->rettype() != xpath_type_node_set) throw_error("count() has to be applied to node set");
- return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
- }
- else if (name == PUGIXML_TEXT("contains") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("concat") && argc >= 2)
- return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
-
- break;
-
- case 'f':
- if (name == PUGIXML_TEXT("false") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
- else if (name == PUGIXML_TEXT("floor") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
-
- break;
-
- case 'i':
- if (name == PUGIXML_TEXT("id") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
-
- break;
-
- case 'l':
- if (name == PUGIXML_TEXT("last") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
- else if (name == PUGIXML_TEXT("lang") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
- else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
- return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("name") && argc <= 1)
- return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
- else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
- return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
- else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("not") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
- else if (name == PUGIXML_TEXT("number") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("position") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
-
- break;
-
- case 'r':
- if (name == PUGIXML_TEXT("round") && argc == 1)
- return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
-
- break;
-
- case 's':
- if (name == PUGIXML_TEXT("string") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
- else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
- return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]);
- else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
- return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
- return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("sum") && argc == 1)
- {
- if (args[0]->rettype() != xpath_type_node_set) throw_error("sum() has to be applied to node set");
- return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
- }
-
- break;
-
- case 't':
- if (name == PUGIXML_TEXT("translate") && argc == 3)
- return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
- else if (name == PUGIXML_TEXT("true") && argc == 0)
- return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
-
- break;
- }
-
- throw_error("Unrecognized function or wrong parameter count");
-
- return 0;
- }
-
- axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
- {
- specified = true;
-
- switch (name.begin[0])
- {
- case 'a':
- if (name == PUGIXML_TEXT("ancestor"))
- return axis_ancestor;
- else if (name == PUGIXML_TEXT("ancestor-or-self"))
- return axis_ancestor_or_self;
- else if (name == PUGIXML_TEXT("attribute"))
- return axis_attribute;
-
- break;
-
- case 'c':
- if (name == PUGIXML_TEXT("child"))
- return axis_child;
-
- break;
-
- case 'd':
- if (name == PUGIXML_TEXT("descendant"))
- return axis_descendant;
- else if (name == PUGIXML_TEXT("descendant-or-self"))
- return axis_descendant_or_self;
-
- break;
-
- case 'f':
- if (name == PUGIXML_TEXT("following"))
- return axis_following;
- else if (name == PUGIXML_TEXT("following-sibling"))
- return axis_following_sibling;
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("namespace"))
- return axis_namespace;
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("parent"))
- return axis_parent;
- else if (name == PUGIXML_TEXT("preceding"))
- return axis_preceding;
- else if (name == PUGIXML_TEXT("preceding-sibling"))
- return axis_preceding_sibling;
-
- break;
-
- case 's':
- if (name == PUGIXML_TEXT("self"))
- return axis_self;
-
- break;
- }
-
- specified = false;
- return axis_child;
- }
-
- nodetest_t parse_node_test_type(const xpath_lexer_string& name)
- {
- switch (name.begin[0])
- {
- case 'c':
- if (name == PUGIXML_TEXT("comment"))
- return nodetest_type_comment;
-
- break;
-
- case 'n':
- if (name == PUGIXML_TEXT("node"))
- return nodetest_type_node;
-
- break;
-
- case 'p':
- if (name == PUGIXML_TEXT("processing-instruction"))
- return nodetest_type_pi;
-
- break;
-
- case 't':
- if (name == PUGIXML_TEXT("text"))
- return nodetest_type_text;
-
- break;
- }
-
- return nodetest_none;
- }
-
- // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
- xpath_ast_node* parse_primary_expression()
- {
- switch (_lexer.current())
- {
- case lex_var_ref:
- {
- throw_error("Variables are not supported");
-
- return 0;
- }
-
- case lex_open_brace:
- {
- _lexer.next();
-
- xpath_ast_node* n = parse_expression();
-
- if (_lexer.current() != lex_close_brace)
- throw_error("Unmatched braces");
-
- _lexer.next();
-
- return n;
- }
-
- case lex_quoted_string:
- {
- const char_t* value = alloc_string(_lexer.contents());
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
- _lexer.next();
-
- return n;
- }
-
- case lex_number:
- {
- double value = 0;
-
- if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value))
- throw_error("Out of memory");
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
- _lexer.next();
-
- return n;
- }
-
- case lex_string:
- {
- xpath_ast_node* args[2] = {0};
- size_t argc = 0;
-
- xpath_lexer_string function = _lexer.contents();
- _lexer.next();
-
- xpath_ast_node* last_arg = 0;
-
- if (_lexer.current() != lex_open_brace)
- throw_error("Unrecognized function call");
- _lexer.next();
-
- if (_lexer.current() != lex_close_brace)
- args[argc++] = parse_expression();
-
- while (_lexer.current() != lex_close_brace)
- {
- if (_lexer.current() != lex_comma)
- throw_error("No comma between function arguments");
- _lexer.next();
-
- xpath_ast_node* n = parse_expression();
-
- if (argc < 2) args[argc] = n;
- else last_arg->set_next(n);
-
- argc++;
- last_arg = n;
- }
-
- _lexer.next();
-
- return parse_function(function, argc, args);
- }
-
- default:
- throw_error("Unrecognizable primary expression");
-
- return 0;
- }
- }
-
- // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
- // Predicate ::= '[' PredicateExpr ']'
- // PredicateExpr ::= Expr
- xpath_ast_node* parse_filter_expression()
- {
- xpath_ast_node* n = parse_primary_expression();
-
- while (_lexer.current() == lex_open_square_brace)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_expression();
-
- if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
-
- bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
-
- n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
-
- if (_lexer.current() != lex_close_square_brace)
- throw_error("Unmatched square brace");
-
- _lexer.next();
- }
-
- return n;
- }
-
- // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
- // AxisSpecifier ::= AxisName '::' | '@'?
- // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
- // NameTest ::= '*' | NCName ':' '*' | QName
- // AbbreviatedStep ::= '.' | '..'
- xpath_ast_node* parse_step(xpath_ast_node* set)
- {
- if (set && set->rettype() != xpath_type_node_set)
- throw_error("Step has to be applied to node set");
-
- bool axis_specified = false;
- axis_t axis = axis_child; // implied child axis
-
- if (_lexer.current() == lex_axis_attribute)
- {
- axis = axis_attribute;
- axis_specified = true;
-
- _lexer.next();
- }
- else if (_lexer.current() == lex_dot)
- {
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
- }
- else if (_lexer.current() == lex_double_dot)
- {
- _lexer.next();
-
- return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
- }
-
- nodetest_t nt_type = nodetest_none;
- xpath_lexer_string nt_name;
-
- if (_lexer.current() == lex_string)
- {
- // node name test
- nt_name = _lexer.contents();
- _lexer.next();
-
- // was it an axis name?
- if (_lexer.current() == lex_double_colon)
- {
- // parse axis name
- if (axis_specified) throw_error("Two axis specifiers in one step");
-
- axis = parse_axis_name(nt_name, axis_specified);
-
- if (!axis_specified) throw_error("Unknown axis");
-
- // read actual node test
- _lexer.next();
-
- if (_lexer.current() == lex_multiply)
- {
- nt_type = nodetest_all;
- nt_name = xpath_lexer_string();
- _lexer.next();
- }
- else if (_lexer.current() == lex_string)
- {
- nt_name = _lexer.contents();
- _lexer.next();
- }
- else throw_error("Unrecognized node test");
- }
-
- if (nt_type == nodetest_none)
- {
- // node type test or processing-instruction
- if (_lexer.current() == lex_open_brace)
- {
- _lexer.next();
-
- if (_lexer.current() == lex_close_brace)
- {
- _lexer.next();
-
- nt_type = parse_node_test_type(nt_name);
-
- if (nt_type == nodetest_none) throw_error("Unrecognized node type");
-
- nt_name = xpath_lexer_string();
- }
- else if (nt_name == PUGIXML_TEXT("processing-instruction"))
- {
- if (_lexer.current() != lex_quoted_string)
- throw_error("Only literals are allowed as arguments to processing-instruction()");
-
- nt_type = nodetest_pi;
- nt_name = _lexer.contents();
- _lexer.next();
-
- if (_lexer.current() != lex_close_brace)
- throw_error("Unmatched brace near processing-instruction()");
- _lexer.next();
- }
- else
- throw_error("Unmatched brace near node type test");
-
- }
- // QName or NCName:*
- else
- {
- const char_t* colon_pos = pstd::find(nt_name.begin, nt_name.end, ':');
-
- if (colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:*
- {
- nt_name.end--; // erase *
-
- nt_type = nodetest_all_in_namespace;
- }
- else nt_type = nodetest_name;
- }
- }
- }
- else if (_lexer.current() == lex_multiply)
- {
- nt_type = nodetest_all;
- _lexer.next();
- }
- else throw_error("Unrecognized node test");
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
-
- xpath_ast_node* last = 0;
-
- while (_lexer.current() == lex_open_square_brace)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_expression();
-
- xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
-
- if (_lexer.current() != lex_close_square_brace)
- throw_error("Unmatched square brace");
- _lexer.next();
-
- if (last) last->set_next(pred);
- else n->set_right(pred);
-
- last = pred;
- }
-
- return n;
- }
-
- // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
- xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
- {
- xpath_ast_node* n = parse_step(set);
-
- while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- if (l == lex_double_slash)
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-
- n = parse_step(n);
- }
-
- return n;
- }
-
- // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
- // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
- xpath_ast_node* parse_location_path()
- {
- if (_lexer.current() == lex_slash)
- {
- _lexer.next();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
-
- // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
- lexeme_t l = _lexer.current();
-
- if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
- return parse_relative_location_path(n);
- else
- return n;
- }
- else if (_lexer.current() == lex_double_slash)
- {
- _lexer.next();
-
- xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-
- return parse_relative_location_path(n);
- }
- else
- {
- return parse_relative_location_path(0);
- }
- }
-
- // PathExpr ::= LocationPath
- // | FilterExpr
- // | FilterExpr '/' RelativeLocationPath
- // | FilterExpr '//' RelativeLocationPath
- xpath_ast_node* parse_path_expression()
- {
- // Clarification.
- // PathExpr begins with either LocationPath or FilterExpr.
- // FilterExpr begins with PrimaryExpr
- // PrimaryExpr begins with '$' in case of it being a variable reference,
- // '(' in case of it being an expression, string literal, number constant or
- // function call.
-
- if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
- _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
- _lexer.current() == lex_string)
- {
- if (_lexer.current() == lex_string)
- {
- // This is either a function call, or not - if not, we shall proceed with location path
- const char_t* state = _lexer.state();
-
- while (IS_CHARTYPEX(*state, ctx_space)) ++state;
-
- if (*state != '(') return parse_location_path();
-
- // This looks like a function call; however this still can be a node-test. Check it.
- if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
- }
-
- xpath_ast_node* n = parse_filter_expression();
-
- if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- if (l == lex_double_slash)
- {
- if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
-
- n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
- }
-
- // select from location path
- return parse_relative_location_path(n);
- }
-
- return n;
- }
- else return parse_location_path();
- }
-
- // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
- xpath_ast_node* parse_union_expression()
- {
- xpath_ast_node* n = parse_path_expression();
-
- while (_lexer.current() == lex_union)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_union_expression();
-
- if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
- throw_error("Union operator has to be applied to node sets");
-
- n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
- }
-
- return n;
- }
-
- // UnaryExpr ::= UnionExpr | '-' UnaryExpr
- xpath_ast_node* parse_unary_expression()
- {
- if (_lexer.current() == lex_minus)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_unary_expression();
-
- return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
- }
- else return parse_union_expression();
- }
-
- // MultiplicativeExpr ::= UnaryExpr
- // | MultiplicativeExpr '*' UnaryExpr
- // | MultiplicativeExpr 'div' UnaryExpr
- // | MultiplicativeExpr 'mod' UnaryExpr
- xpath_ast_node* parse_multiplicative_expression()
- {
- xpath_ast_node* n = parse_unary_expression();
-
- while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
- (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
- {
- ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
- _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
- _lexer.next();
-
- xpath_ast_node* expr = parse_unary_expression();
-
- n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
- }
-
- return n;
- }
-
- // AdditiveExpr ::= MultiplicativeExpr
- // | AdditiveExpr '+' MultiplicativeExpr
- // | AdditiveExpr '-' MultiplicativeExpr
- xpath_ast_node* parse_additive_expression()
- {
- xpath_ast_node* n = parse_multiplicative_expression();
-
- while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
- {
- lexeme_t l = _lexer.current();
-
- _lexer.next();
-
- xpath_ast_node* expr = parse_multiplicative_expression();
-
- n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
- }
-
- return n;
- }
-
- // RelationalExpr ::= AdditiveExpr
- // | RelationalExpr '<' AdditiveExpr
- // | RelationalExpr '>' AdditiveExpr
- // | RelationalExpr '<=' AdditiveExpr
- // | RelationalExpr '>=' AdditiveExpr
- xpath_ast_node* parse_relational_expression()
- {
- xpath_ast_node* n = parse_additive_expression();
-
- while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
- _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- xpath_ast_node* expr = parse_additive_expression();
-
- n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
- l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
- }
-
- return n;
- }
-
- // EqualityExpr ::= RelationalExpr
- // | EqualityExpr '=' RelationalExpr
- // | EqualityExpr '!=' RelationalExpr
- xpath_ast_node* parse_equality_expression()
- {
- xpath_ast_node* n = parse_relational_expression();
-
- while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
- {
- lexeme_t l = _lexer.current();
-
- _lexer.next();
-
- xpath_ast_node* expr = parse_relational_expression();
-
- n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
- }
-
- return n;
- }
-
- // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
- xpath_ast_node* parse_and_expression()
- {
- xpath_ast_node* n = parse_equality_expression();
-
- while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_equality_expression();
-
- n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
- }
-
- return n;
- }
-
- // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
- xpath_ast_node* parse_or_expression()
- {
- xpath_ast_node* n = parse_and_expression();
-
- while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_and_expression();
-
- n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
- }
-
- return n;
- }
-
- // Expr ::= OrExpr
- xpath_ast_node* parse_expression()
- {
- return parse_or_expression();
- }
-
- xpath_parser(const char_t* query, xpath_allocator& alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _result(result)
- {
- }
-
- xpath_ast_node* parse()
- {
- xpath_ast_node* result = parse_expression();
-
- if (_lexer.current() != lex_eof)
- {
- // there are still unparsed tokens left, error
- throw_error("Incorrect query");
- }
-
- return result;
- }
-
- static xpath_ast_node* parse(const char_t* query, xpath_allocator& alloc, xpath_parse_result* result)
- {
- xpath_parser parser(query, alloc, result);
-
- int error = setjmp(parser._error_handler);
-
- return (error == 0) ? parser.parse() : 0;
- }
- };
-
- const char* xpath_parse_result::description() const
- {
- return error ? error : "No error";
- }
-
- xpath_query::xpath_query(const char_t* query): _alloc(0), _root(0)
- {
- _result.error = 0;
- _result.offset = 0;
-
- _alloc = xpath_allocator::create();
-
- if (!_alloc)
- {
- _result.error = "Out of memory";
- }
- else
- {
- _root = xpath_parser::parse(query, *_alloc, &_result);
- }
-
- if (!_root)
- {
- xpath_allocator::destroy(_alloc);
- _alloc = 0;
-
- #ifndef PUGIXML_NO_EXCEPTIONS
- throw xpath_exception(_result);
- #endif
- }
- }
-
- xpath_query::~xpath_query()
- {
- xpath_allocator::destroy(_alloc);
- }
-
- xpath_value_type xpath_query::return_type() const
- {
- if (!_root) return xpath_type_none;
-
- return _root->rettype();
- }
-
- bool xpath_query::evaluate_boolean(const xpath_node& n) const
- {
- if (!_root) return false;
-
- xpath_context c(n, 1, 1);
-
- return _root->eval_boolean(c);
- }
-
- double xpath_query::evaluate_number(const xpath_node& n) const
- {
- if (!_root) return gen_nan();
-
- xpath_context c(n, 1, 1);
-
- return _root->eval_number(c);
- }
-
-#ifndef PUGIXML_NO_STL
- string_t xpath_query::evaluate_string(const xpath_node& n) const
- {
- if (!_root) return string_t();
-
- xpath_context c(n, 1, 1);
-
- return _root->eval_string(c).c_str();
- }
-#endif
-
- size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
- {
- xpath_context c(n, 1, 1);
- xpath_string r = _root ? _root->eval_string(c) : xpath_string();
-
- size_t size = r.length() + 1;
-
- // $$ zero-terminate?
- if (capacity > 0) memcpy(buffer, r.c_str(), (size < capacity ? size : capacity) * sizeof(char_t));
-
- return size;
- }
-
- xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
- {
- if (!_root) return xpath_node_set();
-
- if (_root->rettype() != xpath_type_node_set)
- {
- #ifdef PUGIXML_NO_EXCEPTIONS
- return xpath_node_set();
- #else
- xpath_parse_result result = {"Expression does not evaluate to node set", 0};
- throw xpath_exception(result);
- #endif
- }
-
- xpath_context c(n, 1, 1);
-
- return _root->eval_node_set(c);
- }
-
- const xpath_parse_result& xpath_query::result() const
- {
- return _result;
- }
-
- xpath_query::operator unspecified_bool_type() const
- {
- return _root ? &xpath_query::_root : 0;
- }
-
- bool xpath_query::operator!() const
- {
- return !_root;
- }
-
- xpath_node xml_node::select_single_node(const char_t* query) const
- {
- xpath_query q(query);
- return select_single_node(q);
- }
-
- xpath_node xml_node::select_single_node(const xpath_query& query) const
- {
- xpath_node_set s = query.evaluate_node_set(*this);
- return s.empty() ? xpath_node() : s.first();
- }
-
- xpath_node_set xml_node::select_nodes(const char_t* query) const
- {
- xpath_query q(query);
- return select_nodes(q);
- }
-
- xpath_node_set xml_node::select_nodes(const xpath_query& query) const
- {
- return query.evaluate_node_set(*this);
- }
-}
-
-#endif
-
-/**
- * Copyright (c) 2006-2010 Arseny Kapoulkine
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */