From 624b5702d75d63dde56fbbc89680358241035aa7 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Fri, 11 Jun 2010 20:39:57 +0000 Subject: Rewritten numeric character reference parsing (fixed &#; and &#x; parsing), added more character reference tests git-svn-id: http://pugixml.googlecode.com/svn/trunk@512 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 51 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 20 deletions(-) (limited to 'src/pugixml.cpp') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 242202e..68a68d0 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1374,37 +1374,48 @@ namespace { unsigned int ucsc = 0; - ++stre; - - if (*stre == 'x') // &#x... (hex code) + if (stre[1] == 'x') // &#x... (hex code) { - ++stre; - - while (*stre) + stre += 2; + + char_t ch = *stre; + + if (ch == ';') return stre; + + for (;;) { - if (*stre >= '0' && *stre <= '9') - ucsc = 16 * ucsc + (*stre++ - '0'); - else if (*stre >= 'A' && *stre <= 'F') - ucsc = 16 * ucsc + (*stre++ - 'A' + 10); - else if (*stre >= 'a' && *stre <= 'f') - ucsc = 16 * ucsc + (*stre++ - 'a' + 10); - else if (*stre == ';') + if (static_cast(ch - '0') <= 9) + ucsc = 16 * ucsc + (ch - '0'); + else if (static_cast((ch | ' ') - 'a') <= 5) + ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); + else if (ch == ';') break; else // cancel return stre; - } - if (*stre != ';') return stre; - + ch = *++stre; + } + ++stre; } else // &#... (dec code) { - while (*stre >= '0' && *stre <= '9') - ucsc = 10 * ucsc + (*stre++ - '0'); + char_t ch = *++stre; - if (*stre != ';') return stre; - + if (ch == ';') return stre; + + for (;;) + { + if (static_cast(ch - '0') <= 9) + ucsc = 10 * ucsc + (ch - '0'); + else if (ch == ';') + break; + else // cancel + return stre; + + ch = *++stre; + } + ++stre; } -- cgit v1.2.3