summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2015-09-20 10:37:46 -0700
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2015-09-20 10:43:38 -0700
commitec0c9c5561785299d0c03ed05fe94e0031ba6487 (patch)
treed419f86bde522652621677dd3b45c0c54241cfaa /src
parentbda55c818c0936fe5e41e381794018f0fc60b1a3 (diff)
Implement custom string to integer conversion
This makes conversion significantly faster and removes more CRT dependencies; in particular, to support long long pugixml only requires the type itself (and the division operator...). New implementation is up to 3x faster on short decimal numbers. Note that unlike the old implementation, new implementation correctly handles overflow and underflow and clamps the value to the representable range. This means that there are some behavior changes - e.g. previously as_uint on "-1" would return INT_MAX instead of 0. In addition to CRT issues, for platforms with 64-bit long old implementation incorrectly truncated from long to int or unsigned int, so even if CRT clamped the values the result would have been incorrect.
Diffstat (limited to 'src')
-rw-r--r--src/pugixml.cpp111
1 files changed, 63 insertions, 48 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 413e342..2df5394 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -20,6 +20,7 @@
#include <stdio.h>
#include <string.h>
#include <assert.h>
+#include <limits.h>
#ifdef PUGIXML_WCHAR_MODE
# include <wchar.h>
@@ -4429,39 +4430,81 @@ PUGI__NS_BEGIN
}
// get value with conversion functions
- PUGI__FN int get_integer_base(const char_t* value)
+ template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
{
+ U result = 0;
const char_t* s = value;
while (PUGI__IS_CHARTYPE(*s, ct_space))
s++;
- if (*s == '-')
- s++;
+ bool negative = (*s == '-');
+
+ s += negative;
+
+ bool overflow = false;
+
+ if (s[0] == '0' && (s[1] | ' ') == 'x')
+ {
+ s += 2;
+
+ const char_t* start = s;
+
+ for (;;)
+ {
+ if (static_cast<unsigned>(*s - '0') < 10)
+ result = result * 16 + (*s - '0');
+ else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
+ result = result * 16 + ((*s | ' ') - 'a' + 10);
+ else
+ break;
+
+ s++;
+ }
+
+ size_t digits = static_cast<size_t>(s - start);
+
+ overflow = digits > sizeof(U) * 2;
+ }
+ else
+ {
+ const char_t* start = s;
+
+ for (;;)
+ {
+ if (static_cast<unsigned>(*s - '0') < 10)
+ result = result * 10 + (*s - '0');
+ else
+ break;
+
+ s++;
+ }
+
+ size_t digits = static_cast<size_t>(s - start);
+
+ PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
- return (s[0] == '0' && (s[1] | ' ') == 'x') ? 16 : 10;
+ const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
+ const char max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
+ const size_t high_bit = sizeof(U) * 8 - 1;
+
+ overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
+ }
+
+ if (negative)
+ return (overflow || result > minneg) ? 0 - minneg : 0 - result;
+ else
+ return (overflow || result > maxpos) ? maxpos : result;
}
PUGI__FN int get_value_int(const char_t* value)
{
- int base = get_integer_base(value);
-
- #ifdef PUGIXML_WCHAR_MODE
- return static_cast<int>(wcstol(value, 0, base));
- #else
- return static_cast<int>(strtol(value, 0, base));
- #endif
+ return string_to_integer<unsigned int>(value, INT_MIN, INT_MAX);
}
PUGI__FN unsigned int get_value_uint(const char_t* value)
{
- int base = get_integer_base(value);
-
- #ifdef PUGIXML_WCHAR_MODE
- return static_cast<unsigned int>(wcstoul(value, 0, base));
- #else
- return static_cast<unsigned int>(strtoul(value, 0, base));
- #endif
+ return string_to_integer<unsigned int>(value, 0, UINT_MAX);
}
PUGI__FN double get_value_double(const char_t* value)
@@ -4494,40 +4537,12 @@ PUGI__NS_BEGIN
#ifdef PUGIXML_HAS_LONG_LONG
PUGI__FN long long get_value_llong(const char_t* value)
{
- int base = get_integer_base(value);
-
- #ifdef PUGIXML_WCHAR_MODE
- #ifdef PUGI__MSVC_CRT_VERSION
- return _wcstoi64(value, 0, base);
- #else
- return wcstoll(value, 0, base);
- #endif
- #else
- #ifdef PUGI__MSVC_CRT_VERSION
- return _strtoi64(value, 0, base);
- #else
- return strtoll(value, 0, base);
- #endif
- #endif
+ return string_to_integer<unsigned long long>(value, LLONG_MIN, LLONG_MAX);
}
PUGI__FN unsigned long long get_value_ullong(const char_t* value)
{
- int base = get_integer_base(value);
-
- #ifdef PUGIXML_WCHAR_MODE
- #ifdef PUGI__MSVC_CRT_VERSION
- return _wcstoui64(value, 0, base);
- #else
- return wcstoull(value, 0, base);
- #endif
- #else
- #ifdef PUGI__MSVC_CRT_VERSION
- return _strtoui64(value, 0, base);
- #else
- return strtoull(value, 0, base);
- #endif
- #endif
+ return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
}
#endif