From 58be69c574390715e4f72ed7e835b236958f9d9c Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Mon, 8 Jan 2007 16:24:53 +0000 Subject: Removed comments (cppguru does not want them), refactored chartype_symbol parsing, documentation fixes git-svn-id: http://pugixml.googlecode.com/svn/trunk@28 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 62 ++++++++++++++++++++++++++------------------------------- src/pugixml.hpp | 2 -- 2 files changed, 28 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index da53c66..cf74eed 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3,8 +3,6 @@ // Pug Improved XML Parser - Version 0.2 // -------------------------------------------------------- // Copyright (C) 2006-2007, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) -// Thanks to Palvelev Artyom (cppguru@mail.ru) for hints about optimizing -// conversion functions. // This work is based on the pugxml parser, which is: // Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) // Released into the Public Domain. Use at your own risk. @@ -120,29 +118,30 @@ namespace ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, space, tab ct_space = 8, // \r, \n, space, tab ct_parse_cdata = 16, // \0, ], >, \r - ct_parse_comment = 32 // \0, -, >, \r + ct_parse_comment = 32, // \0, -, >, \r + ct_symbol = 64 // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . }; - + static unsigned char chartype_table[256] = { - 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 - 12, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 32, 0, 0, // 32-47 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 48, 0, // 48-63 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-79 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, // 80-95 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 96-111 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 112-127 - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 + 12, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 1, 0, 48, 0, // 48-63 + 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 64-79 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 0, 16, 0, 64, // 80-95 + 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 96-111 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, // 112-127 + + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }; bool is_chartype(char c, chartype ct) @@ -275,9 +274,6 @@ namespace pugi struct xml_parser_impl { xml_allocator& alloc; - bool chartype_symbol_table[256]; - - bool chartype_symbol(char c) const { return chartype_symbol_table[(unsigned char)c]; } struct gap { @@ -724,8 +720,6 @@ namespace pugi { for (unsigned int c = 0; c < 256; ++c) { - chartype_symbol_table[c] = c > 127 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9') || c == '_' || c == ':' || c == '-' || c == '.'; } } @@ -756,10 +750,10 @@ namespace pugi if(*s == '?') // 'name = s; - SCANWHILE(chartype_symbol(*s)); // Scan for a terminator. + SCANWHILE(is_chartype(*s, ct_symbol)); // Scan for a terminator. ENDSEG(); // Save char in 'ch', terminate & step over. if (*s!=0 && ch == '/') // 'name = s; // Save the offset. - SCANWHILE(chartype_symbol(*s)); // Scan for a terminator. + SCANWHILE(is_chartype(*s, ct_symbol)); // Scan for a terminator. ENDSEG(); // Save char in 'ch', terminate & step over. if(*s!=0 && is_chartype(ch, ct_space)) SKIPWS(); // Eat any whitespace. if(*s!=0 && (ch == '=' || *s == '=')) // '<... #=...' @@ -1040,7 +1034,7 @@ namespace pugi if (name) { - while (*tagname && chartype_symbol(*tagname)) + while (*tagname && is_chartype(*tagname, ct_symbol)) { if (*tagname++ != *name++) goto TAG_NEXTMATCH; } @@ -1063,7 +1057,7 @@ namespace pugi char* name = cursor->name; if (!name) return s; - while (*s && chartype_symbol(*s)) + while (*s && is_chartype(*s, ct_symbol)) { if (*s++ != *name++) return s; } diff --git a/src/pugixml.hpp b/src/pugixml.hpp index b7ded37..de7939e 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -3,8 +3,6 @@ // Pug Improved XML Parser - Version 0.2 // -------------------------------------------------------- // Copyright (C) 2006-2007, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) -// Thanks to Palvelev Artyom (cppguru@mail.ru) for hints about optimizing -// conversion functions. // This work is based on the pugxml parser, which is: // Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) // Released into the Public Domain. Use at your own risk. -- cgit v1.2.3