summaryrefslogtreecommitdiff
path: root/src/pugixml.cpp
diff options
context:
space:
mode:
authorarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2010-08-29 15:32:24 +0000
committerarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2010-08-29 15:32:24 +0000
commit0e6d53c9e57e0b531b569ddacf71719fc15d0534 (patch)
tree2bf9e6c01edde751deca0b41dd65cbee803152a3 /src/pugixml.cpp
parent9b337a176f89c2261211188cc398c3e15952d87a (diff)
Refactoring: merged includes/warnings sections, moved string utilities to anonymous namespace, shared chartype implementation
git-svn-id: http://pugixml.googlecode.com/svn/trunk@671 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'src/pugixml.cpp')
-rw-r--r--src/pugixml.cpp295
1 files changed, 115 insertions, 180 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index db9564c..5531e88 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -20,6 +20,12 @@
#include <setjmp.h>
#include <wchar.h>
+#ifndef PUGIXML_NO_XPATH
+# include <ctype.h>
+# include <math.h>
+# include <float.h>
+#endif
+
#ifndef PUGIXML_NO_STL
# include <istream>
# include <ostream>
@@ -31,8 +37,9 @@
#ifdef _MSC_VER
# pragma warning(disable: 4127) // conditional expression is constant
-# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
+# pragma warning(disable: 4702) // unreachable code
# pragma warning(disable: 4996) // this function or variable may be unsafe
#endif
@@ -48,6 +55,7 @@
#ifdef __SNC__
# pragma diag_suppress=178 // function was declared but never referenced
+# pragma diag_suppress=237 // controlling expression is constant
#endif
// uintptr_t
@@ -61,6 +69,7 @@ typedef size_t uintptr_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
+typedef __int32 int32_t;
#endif
// Inlining controls
@@ -75,6 +84,13 @@ typedef unsigned __int32 uint32_t;
// Simple static assertion
#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
+// Digital Mars C++ bug workaround for passing char loaded from memory via stack
+#ifdef __DMC__
+# define DMC_VOLATILE volatile
+#else
+# define DMC_VOLATILE
+#endif
+
// Memory allocation
namespace
{
@@ -92,62 +108,49 @@ namespace
pugi::deallocation_function global_deallocate = default_deallocate;
}
-// String utilities prototypes
-namespace pugi
+// String utilities
+namespace
{
- namespace impl
+ using namespace pugi;
+
+ // Get string length
+ size_t strlength(const char_t* s)
{
- size_t strlen(const char_t* s);
- bool strequal(const char_t* src, const char_t* dst);
- bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count);
- void widen_ascii(wchar_t* dest, const char* source);
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+ #else
+ return strlen(s);
+ #endif
}
-}
-// String utilities
-namespace pugi
-{
- namespace impl
+ // Compare two strings
+ bool strequal(const char_t* src, const char_t* dst)
{
- // Get string length
- size_t strlen(const char_t* s)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return wcslen(s);
- #else
- return ::strlen(s);
- #endif
- }
-
- // Compare two strings
- bool PUGIXML_FUNCTION strequal(const char_t* src, const char_t* dst)
- {
- #ifdef PUGIXML_WCHAR_MODE
- return wcscmp(src, dst) == 0;
- #else
- return strcmp(src, dst) == 0;
- #endif
- }
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcscmp(src, dst) == 0;
+ #else
+ return strcmp(src, dst) == 0;
+ #endif
+ }
- // Compare lhs with [rhs_begin, rhs_end)
- bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
- {
- for (size_t i = 0; i < count; ++i)
- if (lhs[i] != rhs[i])
- return false;
-
- return lhs[count] == 0;
- }
-
+ // Compare lhs with [rhs_begin, rhs_end)
+ bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
+ {
+ for (size_t i = 0; i < count; ++i)
+ if (lhs[i] != rhs[i])
+ return false;
+
+ return lhs[count] == 0;
+ }
+
#ifdef PUGIXML_WCHAR_MODE
- // Convert string to wide string, assuming all symbols are ASCII
- void widen_ascii(wchar_t* dest, const char* source)
- {
- for (const char* i = source; *i; ++i) *dest++ = *i;
- *dest = 0;
- }
-#endif
+ // Convert string to wide string, assuming all symbols are ASCII
+ void widen_ascii(wchar_t* dest, const char* source)
+ {
+ for (const char* i = source; *i; ++i) *dest++ = *i;
+ *dest = 0;
}
+#endif
}
namespace pugi
@@ -906,12 +909,35 @@ namespace
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
};
-#ifdef PUGIXML_WCHAR_MODE
- #define IS_CHARTYPE(c, ct) ((static_cast<unsigned int>(c) < 128 ? chartype_table[static_cast<unsigned int>(c)] : chartype_table[128]) & (ct))
-#else
- #define IS_CHARTYPE(c, ct) (chartype_table[static_cast<unsigned char>(c)] & (ct))
-#endif
+ enum chartypex
+ {
+ ctx_space = 1, // \r, \n, space, tab
+ ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _
+ ctx_digit = 4, // 0-9
+ ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+ };
+
+ const unsigned char chartypex_table[256] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63
+ 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95
+ 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
+ };
+
enum output_chartype_t
{
oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
@@ -941,11 +967,15 @@ namespace
};
#ifdef PUGIXML_WCHAR_MODE
- #define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast<unsigned int>(c) < 128 ? output_chartype_table[static_cast<unsigned int>(c)] : output_chartype_table[128]) & (ct))
+ #define IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
#else
- #define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast<unsigned char>(c)] & (ct))
+ #define IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
#endif
+ #define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table)
+ #define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table)
+ #define IS_OUTPUT_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, output_chartype_table)
+
bool is_little_endian()
{
unsigned int ui = 1;
@@ -1007,10 +1037,7 @@ namespace
// try to guess encoding (based on XML specification, Appendix F.1)
const uint8_t* data = static_cast<const uint8_t*>(contents);
- #ifdef __DMC__
- volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from data[3] otherwise)
- #endif
- uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+ DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
return guess_buffer_encoding(d0, d1, d2, d3);
}
@@ -1248,7 +1275,7 @@ namespace
inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
{
assert(target);
- size_t target_length = impl::strlen(target);
+ size_t target_length = strlength(target);
// always reuse document buffer memory if possible
if (!allocated) return target_length >= length;
@@ -1261,7 +1288,7 @@ namespace
bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
{
- size_t source_length = impl::strlen(source);
+ size_t source_length = strlength(source);
if (source_length == 0)
{
@@ -2572,7 +2599,7 @@ namespace
void write(const char_t* data)
{
- write(data, impl::strlen(data));
+ write(data, strlength(data));
}
void write(char_t d0)
@@ -3254,7 +3281,7 @@ namespace pugi
#ifdef PUGIXML_WCHAR_MODE
char_t wbuf[128];
- impl::widen_ascii(wbuf, buf);
+ widen_ascii(wbuf, buf);
return set_value(wbuf);
#else
@@ -3269,7 +3296,7 @@ namespace pugi
#ifdef PUGIXML_WCHAR_MODE
char_t wbuf[128];
- impl::widen_ascii(wbuf, buf);
+ widen_ascii(wbuf, buf);
return set_value(wbuf);
#else
@@ -3284,7 +3311,7 @@ namespace pugi
#ifdef PUGIXML_WCHAR_MODE
char_t wbuf[128];
- impl::widen_ascii(wbuf, buf);
+ widen_ascii(wbuf, buf);
return set_value(wbuf);
#else
@@ -3406,7 +3433,7 @@ namespace pugi
if (!_root) return xml_node();
for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (i->name && impl::strequal(name, i->name)) return xml_node(i);
+ if (i->name && strequal(name, i->name)) return xml_node(i);
return xml_node();
}
@@ -3416,7 +3443,7 @@ namespace pugi
if (!_root) return xml_attribute();
for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
- if (i->name && impl::strequal(name, i->name))
+ if (i->name && strequal(name, i->name))
return xml_attribute(i);
return xml_attribute();
@@ -3427,7 +3454,7 @@ namespace pugi
if (!_root) return xml_node();
for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
- if (i->name && impl::strequal(name, i->name)) return xml_node(i);
+ if (i->name && strequal(name, i->name)) return xml_node(i);
return xml_node();
}
@@ -3445,7 +3472,7 @@ namespace pugi
if (!_root) return xml_node();
for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
- if (i->name && impl::strequal(name, i->name)) return xml_node(i);
+ if (i->name && strequal(name, i->name)) return xml_node(i);
return xml_node();
}
@@ -3775,10 +3802,10 @@ namespace pugi
if (!_root) return xml_node();
for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
- if (i->name && impl::strequal(name, i->name))
+ if (i->name && strequal(name, i->name))
{
for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
- if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
+ if (strequal(attr_name, a->name) && strequal(attr_value, a->value))
return xml_node(i);
}
@@ -3791,7 +3818,7 @@ namespace pugi
for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
- if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
+ if (strequal(attr_name, a->name) && strequal(attr_value, a->value))
return xml_node(i);
return xml_node();
@@ -3855,7 +3882,7 @@ namespace pugi
{
for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
{
- if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
+ if (j->name && strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
{
xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
@@ -4237,7 +4264,7 @@ namespace pugi
xml_encoding encoding = encoding_utf8;
#endif
- return load_buffer(contents, impl::strlen(contents) * sizeof(char_t), options, encoding);
+ return load_buffer(contents, strlength(contents) * sizeof(char_t), options, encoding);
}
xml_parse_result xml_document::parse(char* xmlstr, unsigned int options)
@@ -4498,60 +4525,6 @@ namespace std
#ifndef PUGIXML_NO_XPATH
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <setjmp.h>
-#include <ctype.h>
-#include <math.h>
-#include <float.h>
-
-#ifdef PUGIXML_WCHAR_MODE
-# include <wchar.h>
-#endif
-
-#include <new>
-
-#ifndef PUGIXML_NO_STL
-# include <string>
-#endif
-
-// int32_t
-#if !defined(_MSC_VER) || _MSC_VER >= 1600
-# include <stdint.h>
-#else
-typedef __int32 int32_t;
-#endif
-
-#if defined(_MSC_VER)
-# pragma warning(disable: 4127) // conditional expression is constant
-# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
-# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
-# pragma warning(disable: 4702) // unreachable code
-# pragma warning(disable: 4996) // this function or variable may be unsafe
-#endif
-
-#ifdef __INTEL_COMPILER
-# pragma warning(disable: 1478 1786) // function was declared "deprecated"
-#endif
-
-#ifdef __SNC__
-# pragma diag_suppress=237 // controlling expression is constant
-#endif
-
-// String utilities prototypes
-namespace pugi
-{
- namespace impl
- {
- size_t strlen(const char_t* s);
- bool strequal(const char_t* src, const char_t* dst);
- bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count);
- void widen_ascii(wchar_t* dest, const char* source);
- }
-}
-
// STL replacements
namespace pstd
{
@@ -4675,7 +4648,7 @@ namespace
static char_t* duplicate_string(const char_t* string)
{
- return duplicate_string(string, impl::strlen(string));
+ return duplicate_string(string, strlength(string));
}
public:
@@ -4750,8 +4723,8 @@ namespace
else
{
// need to make heap copy
- size_t target_length = impl::strlen(_buffer);
- size_t source_length = impl::strlen(o._buffer);
+ size_t target_length = strlength(_buffer);
+ size_t source_length = strlength(o._buffer);
size_t length = target_length + source_length;
// allocate new buffer
@@ -4775,7 +4748,7 @@ namespace
size_t length() const
{
- return impl::strlen(_buffer);
+ return strlength(_buffer);
}
char_t* data()
@@ -4797,12 +4770,12 @@ namespace
bool operator==(const xpath_string& o) const
{
- return impl::strequal(_buffer, o._buffer);
+ return strequal(_buffer, o._buffer);
}
bool operator!=(const xpath_string& o) const
{
- return !impl::strequal(_buffer, o._buffer);
+ return !strequal(_buffer, o._buffer);
}
};
@@ -4815,41 +4788,6 @@ namespace
namespace
{
using namespace pugi;
-
- enum chartypex
- {
- ctx_space = 1, // \r, \n, space, tab
- ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _
- ctx_digit = 4, // 0-9
- ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
- };
-
- const unsigned char chartypex_table[256] =
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63
- 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95
- 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127
-
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
- };
-
-#ifdef PUGIXML_WCHAR_MODE
- #define IS_CHARTYPEX(c, ct) ((static_cast<unsigned int>(c) < 128 ? chartypex_table[static_cast<unsigned int>(c)] : chartypex_table[128]) & (ct))
-#else
- #define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast<unsigned char>(c)] & (ct))
-#endif
bool starts_with(const char_t* string, const char_t* pattern)
{
@@ -5325,7 +5263,7 @@ namespace
if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
- return prefix ? name[5] == ':' && impl::strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
+ return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
}
};
@@ -5401,16 +5339,13 @@ namespace
void translate(char_t* buffer, const char_t* from, const char_t* to)
{
- size_t to_length = impl::strlen(to);
+ size_t to_length = strlength(to);
char_t* write = buffer;
while (*buffer)
{
- #ifdef __DMC__
- volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from buffer otherwise)
- #endif
- char_t ch = *buffer++;
+ DMC_VOLATILE char_t ch = *buffer++;
const char_t* pos = find_char(from, ch);
@@ -5798,7 +5733,7 @@ namespace pugi
{
size_t length = static_cast<size_t>(end - begin);
- return impl::strequalrange(other, begin, length);
+ return strequalrange(other, begin, length);
}
};
@@ -6389,7 +6324,7 @@ namespace pugi
switch (_test)
{
case nodetest_name:
- if (impl::strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent));
+ if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent));
break;
case nodetest_type_node:
@@ -6414,7 +6349,7 @@ namespace pugi
switch (_test)
{
case nodetest_name:
- if (n.type() == node_element && impl::strequal(n.name(), _data.nodetest)) ns.push_back(n);
+ if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n);
break;
case nodetest_type_node:
@@ -6437,7 +6372,7 @@ namespace pugi
break;
case nodetest_pi:
- if (n.type() == node_pi && impl::strequal(n.name(), _data.nodetest))
+ if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
ns.push_back(n);
break;