From ec0c9c5561785299d0c03ed05fe94e0031ba6487 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 20 Sep 2015 10:37:46 -0700 Subject: Implement custom string to integer conversion This makes conversion significantly faster and removes more CRT dependencies; in particular, to support long long pugixml only requires the type itself (and the division operator...). New implementation is up to 3x faster on short decimal numbers. Note that unlike the old implementation, new implementation correctly handles overflow and underflow and clamps the value to the representable range. This means that there are some behavior changes - e.g. previously as_uint on "-1" would return INT_MAX instead of 0. In addition to CRT issues, for platforms with 64-bit long old implementation incorrectly truncated from long to int or unsigned int, so even if CRT clamped the values the result would have been incorrect. --- src/pugixml.cpp | 111 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 48 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 413e342..2df5394 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef PUGIXML_WCHAR_MODE # include @@ -4429,39 +4430,81 @@ PUGI__NS_BEGIN } // get value with conversion functions - PUGI__FN int get_integer_base(const char_t* value) + template U string_to_integer(const char_t* value, U minneg, U maxpos) { + U result = 0; const char_t* s = value; while (PUGI__IS_CHARTYPE(*s, ct_space)) s++; - if (*s == '-') - s++; + bool negative = (*s == '-'); + + s += negative; + + bool overflow = false; + + if (s[0] == '0' && (s[1] | ' ') == 'x') + { + s += 2; + + const char_t* start = s; + + for (;;) + { + if (static_cast(*s - '0') < 10) + result = result * 16 + (*s - '0'); + else if (static_cast((*s | ' ') - 'a') < 6) + result = result * 16 + ((*s | ' ') - 'a' + 10); + else + break; + + s++; + } + + size_t digits = static_cast(s - start); + + overflow = digits > sizeof(U) * 2; + } + else + { + const char_t* start = s; + + for (;;) + { + if (static_cast(*s - '0') < 10) + result = result * 10 + (*s - '0'); + else + break; + + s++; + } + + size_t digits = static_cast(s - start); + + PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); - return (s[0] == '0' && (s[1] | ' ') == 'x') ? 16 : 10; + const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; + const char max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; + const size_t high_bit = sizeof(U) * 8 - 1; + + overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); + } + + if (negative) + return (overflow || result > minneg) ? 0 - minneg : 0 - result; + else + return (overflow || result > maxpos) ? maxpos : result; } PUGI__FN int get_value_int(const char_t* value) { - int base = get_integer_base(value); - - #ifdef PUGIXML_WCHAR_MODE - return static_cast(wcstol(value, 0, base)); - #else - return static_cast(strtol(value, 0, base)); - #endif + return string_to_integer(value, INT_MIN, INT_MAX); } PUGI__FN unsigned int get_value_uint(const char_t* value) { - int base = get_integer_base(value); - - #ifdef PUGIXML_WCHAR_MODE - return static_cast(wcstoul(value, 0, base)); - #else - return static_cast(strtoul(value, 0, base)); - #endif + return string_to_integer(value, 0, UINT_MAX); } PUGI__FN double get_value_double(const char_t* value) @@ -4494,40 +4537,12 @@ PUGI__NS_BEGIN #ifdef PUGIXML_HAS_LONG_LONG PUGI__FN long long get_value_llong(const char_t* value) { - int base = get_integer_base(value); - - #ifdef PUGIXML_WCHAR_MODE - #ifdef PUGI__MSVC_CRT_VERSION - return _wcstoi64(value, 0, base); - #else - return wcstoll(value, 0, base); - #endif - #else - #ifdef PUGI__MSVC_CRT_VERSION - return _strtoi64(value, 0, base); - #else - return strtoll(value, 0, base); - #endif - #endif + return string_to_integer(value, LLONG_MIN, LLONG_MAX); } PUGI__FN unsigned long long get_value_ullong(const char_t* value) { - int base = get_integer_base(value); - - #ifdef PUGIXML_WCHAR_MODE - #ifdef PUGI__MSVC_CRT_VERSION - return _wcstoui64(value, 0, base); - #else - return wcstoull(value, 0, base); - #endif - #else - #ifdef PUGI__MSVC_CRT_VERSION - return _strtoui64(value, 0, base); - #else - return strtoull(value, 0, base); - #endif - #endif + return string_to_integer(value, 0, ULLONG_MAX); } #endif -- cgit v1.2.3