diff options
author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2017-02-05 21:34:54 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-05 21:34:54 -0800 |
commit | a9fe2bb62e0976ab74fdb5266cc3725250eca075 (patch) | |
tree | 85c96e92afb2e3437b0eb20c805227b91e5e69f2 | |
parent | d3b9e4e1e85d0aca562d0e6b62533e68e5a4a749 (diff) | |
parent | 10676b6b8548ddbf9458993062e6a27c2c233d48 (diff) |
Merge pull request #131 from zeux/xpath-noeh
XPath: Remove exceptional control flow
-rw-r--r-- | .travis.yml | 1 | ||||
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | src/pugixml.cpp | 521 | ||||
-rw-r--r-- | tests/test_compact.cpp | 114 | ||||
-rw-r--r-- | tests/test_document.cpp | 9 | ||||
-rw-r--r-- | tests/test_dom_modify.cpp | 28 | ||||
-rw-r--r-- | tests/test_dom_traverse.cpp | 21 | ||||
-rw-r--r-- | tests/test_parse.cpp | 149 | ||||
-rw-r--r-- | tests/test_write.cpp | 6 | ||||
-rw-r--r-- | tests/test_xpath.cpp | 120 | ||||
-rw-r--r-- | tests/test_xpath_api.cpp | 15 | ||||
-rw-r--r-- | tests/test_xpath_functions.cpp | 14 | ||||
-rw-r--r-- | tests/test_xpath_operators.cpp | 10 | ||||
-rw-r--r-- | tests/test_xpath_parse.cpp | 76 | ||||
-rw-r--r-- | tests/test_xpath_paths.cpp | 7 |
15 files changed, 853 insertions, 240 deletions
diff --git a/.travis.yml b/.travis.yml index bf734b9..df5569c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ env: - DEFINES=standard - DEFINES=PUGIXML_WCHAR_MODE - DEFINES=PUGIXML_COMPACT + - DEFINES=PUGIXML_NO_EXCEPTIONS script: - make test cxxstd=c++11 defines=$DEFINES config=coverage -j2 - make test cxxstd=c++11 defines=$DEFINES config=release -j2 @@ -61,7 +61,7 @@ ifeq ($(config),coverage) test: $(EXECUTABLE) -@find $(BUILD) -name '*.gcda' -exec rm {} + ./$(EXECUTABLE) - @gcov -o $(BUILD)/src/ pugixml.cpp.gcda | sed -e '/./{H;$!d;}' -e 'x;/pugixml.cpp/!d;' + @gcov -b -o $(BUILD)/src/ pugixml.cpp.gcda | sed -e '/./{H;$!d;}' -e 'x;/pugixml.cpp/!d;' @find . -name '*.gcov' -and -not -name 'pugixml.cpp.gcov' -exec rm {} + else test: $(EXECUTABLE) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 0813ae6..7e6fe64 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -29,9 +29,6 @@ #ifndef PUGIXML_NO_XPATH # include <math.h> # include <float.h> -# ifdef PUGIXML_NO_EXCEPTIONS -# include <setjmp.h> -# endif #endif #ifndef PUGIXML_NO_STL @@ -47,10 +44,8 @@ # pragma warning(push) # pragma warning(disable: 4127) // conditional expression is constant # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) -# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable # pragma warning(disable: 4702) // unreachable code # pragma warning(disable: 4996) // this function or variable may be unsafe -# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged #endif #ifdef __INTEL_COMPILER @@ -6118,7 +6113,7 @@ namespace pugi if (j != _root) result[--offset] = delimiter; - if (j->name && *j->name) + if (j->name) { size_t length = impl::strlength(j->name); @@ -6137,7 +6132,7 @@ namespace pugi { xml_node found = *this; // Current search context. - if (!_root || !path_ || !path_[0]) return found; + if (!_root || !path_[0]) return found; if (path_[0] == delimiter) { @@ -7392,24 +7387,17 @@ PUGI__NS_BEGIN }; }; - class xpath_allocator + struct xpath_allocator { xpath_memory_block* _root; size_t _root_size; + bool* _error; - public: - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf* error_handler; - #endif - - xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) + xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) { - #ifdef PUGIXML_NO_EXCEPTIONS - error_handler = 0; - #endif } - void* allocate_nothrow(size_t size) + void* allocate(size_t size) { // round size up to block alignment boundary size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); @@ -7430,7 +7418,11 @@ PUGI__NS_BEGIN size_t block_size = block_capacity + offsetof(xpath_memory_block, data); xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); - if (!block) return 0; + if (!block) + { + if (_error) *_error = true; + return 0; + } block->next = _root; block->capacity = block_capacity; @@ -7442,23 +7434,6 @@ PUGI__NS_BEGIN } } - void* allocate(size_t size) - { - void* result = allocate_nothrow(size); - - if (!result) - { - #ifdef PUGIXML_NO_EXCEPTIONS - assert(error_handler); - longjmp(*error_handler, 1); - #else - throw std::bad_alloc(); - #endif - } - - return result; - } - void* reallocate(void* ptr, size_t old_size, size_t new_size) { // round size up to block alignment boundary @@ -7468,33 +7443,35 @@ PUGI__NS_BEGIN // we can only reallocate the last object assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); - // adjust root size so that we have not allocated the object at all - bool only_object = (_root_size == old_size); - - if (ptr) _root_size -= old_size; + // try to reallocate the object inplace + if (ptr && _root_size - old_size + new_size <= _root->capacity) + { + _root_size = _root_size - old_size + new_size; + return ptr; + } - // allocate a new version (this will obviously reuse the memory if possible) + // allocate a new block void* result = allocate(new_size); - assert(result); + if (!result) return 0; // we have a new block - if (result != ptr && ptr) + if (ptr) { - // copy old data + // copy old data (we only support growing) assert(new_size >= old_size); memcpy(result, ptr, old_size); // free the previous page if it had no other objects - if (only_object) - { - assert(_root->data == result); - assert(_root->next); + assert(_root->data == result); + assert(_root->next); + if (_root->next->data == ptr) + { + // deallocate the whole page, unless it was the first one xpath_memory_block* next = _root->next->next; if (next) { - // deallocate the whole page, unless it was the first one xml_memory::deallocate(_root->next); _root->next = next; } @@ -7566,22 +7543,15 @@ PUGI__NS_BEGIN xpath_allocator result; xpath_allocator temp; xpath_stack stack; + bool oom; - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf error_handler; - #endif - - xpath_stack_data(): result(blocks + 0), temp(blocks + 1) + xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) { blocks[0].next = blocks[1].next = 0; blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); stack.result = &result; stack.temp = &temp; - - #ifdef PUGIXML_NO_EXCEPTIONS - result.error_handler = temp.error_handler = &error_handler; - #endif } ~xpath_stack_data() @@ -7603,7 +7573,7 @@ PUGI__NS_BEGIN static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) { char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); - assert(result); + if (!result) return 0; memcpy(result, string, length * sizeof(char_t)); result[length] = 0; @@ -7632,9 +7602,13 @@ PUGI__NS_BEGIN { assert(begin <= end); + if (begin == end) + return xpath_string(); + size_t length = static_cast<size_t>(end - begin); + const char_t* data = duplicate_string(begin, length, alloc); - return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); + return data ? xpath_string(data, true, length) : xpath_string(); } xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) @@ -7660,7 +7634,7 @@ PUGI__NS_BEGIN // allocate new buffer char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); - assert(result); + if (!result) return; // append first string to the new buffer in case there was no reallocation if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); @@ -7692,8 +7666,11 @@ PUGI__NS_BEGIN if (!_uses_heap) { size_t length_ = strlength(_buffer); + const char_t* data_ = duplicate_string(_buffer, length_, alloc); - _buffer = duplicate_string(_buffer, length_, alloc); + if (!data_) return 0; + + _buffer = data_; _uses_heap = true; _length_heap = length_; } @@ -8117,7 +8094,7 @@ PUGI__NS_BEGIN // allocate a buffer of suitable length for the number size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); - assert(result); + if (!result) return xpath_string(); // make the number! char_t* s = result; @@ -8401,12 +8378,10 @@ PUGI__NS_BEGIN if (!table[i]) table[i] = static_cast<unsigned char>(i); - void* result = alloc->allocate_nothrow(sizeof(table)); + void* result = alloc->allocate(sizeof(table)); + if (!result) return 0; - if (result) - { - memcpy(result, table, sizeof(table)); - } + memcpy(result, table, sizeof(table)); return static_cast<unsigned char*>(result); } @@ -8749,7 +8724,7 @@ PUGI__NS_BEGIN { // reallocate the old array or allocate a new one xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); - assert(data); + if (!data) return; // finalize _begin = data; @@ -8801,7 +8776,7 @@ PUGI__NS_BEGIN // reallocate the old array or allocate a new one xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); - assert(data); + if (!data) return; // finalize _begin = data; @@ -10387,7 +10362,7 @@ PUGI__NS_BEGIN if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) { buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); - assert(buffer); + if (!buffer) return xpath_string(); } // evaluate all strings to temporary stack @@ -10405,7 +10380,7 @@ PUGI__NS_BEGIN // create final string char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); - assert(result); + if (!result) return xpath_string(); char_t* ri = result; @@ -10572,6 +10547,8 @@ PUGI__NS_BEGIN xpath_string s = string_value(c.n, stack.result); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = normalize_space(begin); return xpath_string::from_heap_preallocated(begin, end); @@ -10582,6 +10559,8 @@ PUGI__NS_BEGIN xpath_string s = _left->eval_string(c, stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = normalize_space(begin); return xpath_string::from_heap_preallocated(begin, end); @@ -10598,6 +10577,8 @@ PUGI__NS_BEGIN xpath_string to = _right->_next->eval_string(c, swapped_stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); return xpath_string::from_heap_preallocated(begin, end); @@ -10608,6 +10589,8 @@ PUGI__NS_BEGIN xpath_string s = _left->eval_string(c, stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = translate_table(begin, _data.table); return xpath_string::from_heap_preallocated(begin, end); @@ -10917,66 +10900,77 @@ PUGI__NS_BEGIN char_t _scratch[32]; - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf _error_handler; - #endif - - void throw_error(const char* message) + xpath_ast_node* error(const char* message) { _result->error = message; _result->offset = _lexer.current_pos() - _query; - #ifdef PUGIXML_NO_EXCEPTIONS - longjmp(_error_handler, 1); - #else - throw xpath_exception(*_result); - #endif + return 0; } - void throw_error_oom() + xpath_ast_node* error_oom() { - #ifdef PUGIXML_NO_EXCEPTIONS - throw_error("Out of memory"); - #else - throw std::bad_alloc(); - #endif + assert(_alloc->_error); + *_alloc->_error = true; + + return 0; } void* alloc_node() { - void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); + return _alloc->allocate(sizeof(xpath_ast_node)); + } - if (!result) throw_error_oom(); + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } - return result; + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; } - const char_t* alloc_string(const xpath_lexer_string& value) + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) { - if (value.begin) - { - size_t length = static_cast<size_t>(value.end - value.begin); + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } - char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); - if (!c) throw_error_oom(); - assert(c); // workaround for clang static analysis + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; + } - memcpy(c, value.begin, length * sizeof(char_t)); - c[length] = 0; + xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; + } - return c; - } - else return 0; + xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; } - xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) + const char_t* alloc_string(const xpath_lexer_string& value) { - assert(argc <= 1); + if (!value.begin) + return PUGIXML_TEXT(""); + + size_t length = static_cast<size_t>(value.end - value.begin); - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) - throw_error("Function has to be applied to node set"); + char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t))); + if (!c) return 0; - return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); + memcpy(c, value.begin, length * sizeof(char_t)); + c[length] = 0; + + return c; } xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) @@ -10985,103 +10979,110 @@ PUGI__NS_BEGIN { case 'b': if (name == PUGIXML_TEXT("boolean") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); + return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); break; case 'c': if (name == PUGIXML_TEXT("count") && argc == 1) { - if (args[0]->rettype() != xpath_type_node_set) - throw_error("Function has to be applied to node set"); - - return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); + if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(ast_func_count, xpath_type_number, args[0]); } else if (name == PUGIXML_TEXT("contains") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); + return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); else if (name == PUGIXML_TEXT("concat") && argc >= 2) - return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("ceiling") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); + return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); break; case 'f': if (name == PUGIXML_TEXT("false") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); + return alloc_node(ast_func_false, xpath_type_boolean); else if (name == PUGIXML_TEXT("floor") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); + return alloc_node(ast_func_floor, xpath_type_number, args[0]); break; case 'i': if (name == PUGIXML_TEXT("id") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); + return alloc_node(ast_func_id, xpath_type_node_set, args[0]); break; case 'l': if (name == PUGIXML_TEXT("last") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); + return alloc_node(ast_func_last, xpath_type_number); else if (name == PUGIXML_TEXT("lang") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); + return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); else if (name == PUGIXML_TEXT("local-name") && argc <= 1) - return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); + } break; case 'n': if (name == PUGIXML_TEXT("name") && argc <= 1) - return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); + } else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) - return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); + } else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); + return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("not") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); + return alloc_node(ast_func_not, xpath_type_boolean, args[0]); else if (name == PUGIXML_TEXT("number") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); + return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); break; case 'p': if (name == PUGIXML_TEXT("position") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); + return alloc_node(ast_func_position, xpath_type_number); break; case 'r': if (name == PUGIXML_TEXT("round") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); + return alloc_node(ast_func_round, xpath_type_number, args[0]); break; case 's': if (name == PUGIXML_TEXT("string") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); + return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); else if (name == PUGIXML_TEXT("string-length") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); + return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); else if (name == PUGIXML_TEXT("starts-with") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); + return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); else if (name == PUGIXML_TEXT("substring-before") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("substring-after") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) - return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); + return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("sum") && argc == 1) { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); + if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(ast_func_sum, xpath_type_number, args[0]); } break; case 't': if (name == PUGIXML_TEXT("translate") && argc == 3) - return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("true") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); + return alloc_node(ast_func_true, xpath_type_boolean); break; @@ -11089,9 +11090,7 @@ PUGI__NS_BEGIN break; } - throw_error("Unrecognized function or wrong parameter count"); - - return 0; + return error("Unrecognized function or wrong parameter count"); } axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) @@ -11207,18 +11206,18 @@ PUGI__NS_BEGIN xpath_lexer_string name = _lexer.contents(); if (!_variables) - throw_error("Unknown variable: variable set is not provided"); + return error("Unknown variable: variable set is not provided"); xpath_variable* var = 0; if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) - throw_error_oom(); + return error_oom(); if (!var) - throw_error("Unknown variable: variable set does not contain the given name"); + return error("Unknown variable: variable set does not contain the given name"); _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); + return alloc_node(ast_variable, var->type(), var); } case lex_open_brace: @@ -11226,9 +11225,10 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* n = parse_expression(); + if (!n) return 0; if (_lexer.current() != lex_close_brace) - throw_error("Unmatched braces"); + return error("Expected ')' to match an opening '('"); _lexer.next(); @@ -11238,11 +11238,11 @@ PUGI__NS_BEGIN case lex_quoted_string: { const char_t* value = alloc_string(_lexer.contents()); + if (!value) return 0; - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); _lexer.next(); - return n; + return alloc_node(ast_string_constant, xpath_type_string, value); } case lex_number: @@ -11250,12 +11250,11 @@ PUGI__NS_BEGIN double value = 0; if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) - throw_error_oom(); + return error_oom(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); _lexer.next(); - return n; + return alloc_node(ast_number_constant, xpath_type_number, value); } case lex_string: @@ -11269,19 +11268,20 @@ PUGI__NS_BEGIN xpath_ast_node* last_arg = 0; if (_lexer.current() != lex_open_brace) - throw_error("Unrecognized function call"); + return error("Unrecognized function call"); _lexer.next(); - if (_lexer.current() != lex_close_brace) - args[argc++] = parse_expression(); - while (_lexer.current() != lex_close_brace) { - if (_lexer.current() != lex_comma) - throw_error("No comma between function arguments"); - _lexer.next(); + if (argc > 0) + { + if (_lexer.current() != lex_comma) + return error("No comma between function arguments"); + _lexer.next(); + } xpath_ast_node* n = parse_expression(); + if (!n) return 0; if (argc < 2) args[argc] = n; else last_arg->set_next(n); @@ -11296,9 +11296,7 @@ PUGI__NS_BEGIN } default: - throw_error("Unrecognizable primary expression"); - - return 0; + return error("Unrecognizable primary expression"); } } @@ -11308,20 +11306,23 @@ PUGI__NS_BEGIN xpath_ast_node* parse_filter_expression() { xpath_ast_node* n = parse_primary_expression(); + if (!n) return 0; while (_lexer.current() == lex_open_square_brace) { _lexer.next(); - xpath_ast_node* expr = parse_expression(); - if (n->rettype() != xpath_type_node_set) - throw_error("Predicate has to be applied to node set"); + return error("Predicate has to be applied to node set"); + + xpath_ast_node* expr = parse_expression(); + if (!expr) return 0; - n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); + n = alloc_node(ast_filter, n, expr, predicate_default); + if (!n) return 0; if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); + return error("Expected ']' to match an opening '['"); _lexer.next(); } @@ -11337,7 +11338,7 @@ PUGI__NS_BEGIN xpath_ast_node* parse_step(xpath_ast_node* set) { if (set && set->rettype() != xpath_type_node_set) - throw_error("Step has to be applied to node set"); + return error("Step has to be applied to node set"); bool axis_specified = false; axis_t axis = axis_child; // implied child axis @@ -11353,13 +11354,19 @@ PUGI__NS_BEGIN { _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); + if (_lexer.current() == lex_open_square_brace) + return error("Predicates are not allowed after an abbreviated step"); + + return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); } else if (_lexer.current() == lex_double_dot) { _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); + if (_lexer.current() == lex_open_square_brace) + return error("Predicates are not allowed after an abbreviated step"); + + return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); } nodetest_t nt_type = nodetest_none; @@ -11376,12 +11383,12 @@ PUGI__NS_BEGIN { // parse axis name if (axis_specified) - throw_error("Two axis specifiers in one step"); + return error("Two axis specifiers in one step"); axis = parse_axis_name(nt_name, axis_specified); if (!axis_specified) - throw_error("Unknown axis"); + return error("Unknown axis"); // read actual node test _lexer.next(); @@ -11397,7 +11404,10 @@ PUGI__NS_BEGIN nt_name = _lexer.contents(); _lexer.next(); } - else throw_error("Unrecognized node test"); + else + { + return error("Unrecognized node test"); + } } if (nt_type == nodetest_none) @@ -11414,26 +11424,26 @@ PUGI__NS_BEGIN nt_type = parse_node_test_type(nt_name); if (nt_type == nodetest_none) - throw_error("Unrecognized node type"); + return error("Unrecognized node type"); nt_name = xpath_lexer_string(); } else if (nt_name == PUGIXML_TEXT("processing-instruction")) { if (_lexer.current() != lex_quoted_string) - throw_error("Only literals are allowed as arguments to processing-instruction()"); + return error("Only literals are allowed as arguments to processing-instruction()"); nt_type = nodetest_pi; nt_name = _lexer.contents(); _lexer.next(); if (_lexer.current() != lex_close_brace) - throw_error("Unmatched brace near processing-instruction()"); + return error("Unmatched brace near processing-instruction()"); _lexer.next(); } else { - throw_error("Unmatched brace near node type test"); + return error("Unmatched brace near node type test"); } } // QName or NCName:* @@ -11459,11 +11469,14 @@ PUGI__NS_BEGIN } else { - throw_error("Unrecognized node test"); + return error("Unrecognized node test"); } const char_t* nt_name_copy = alloc_string(nt_name); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name_copy); + if (!nt_name_copy) return 0; + + xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); + if (!n) return 0; xpath_ast_node* last = 0; @@ -11472,11 +11485,13 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* expr = parse_expression(); + if (!expr) return 0; - xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); + xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); + if (!pred) return 0; if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); + return error("Expected ']' to match an opening '['"); _lexer.next(); if (last) last->set_next(pred); @@ -11492,6 +11507,7 @@ PUGI__NS_BEGIN xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) { xpath_ast_node* n = parse_step(set); + if (!n) return 0; while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { @@ -11499,9 +11515,13 @@ PUGI__NS_BEGIN _lexer.next(); if (l == lex_double_slash) - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + { + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; + } n = parse_step(n); + if (!n) return 0; } return n; @@ -11515,7 +11535,8 @@ PUGI__NS_BEGIN { _lexer.next(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); + if (!n) return 0; // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path lexeme_t l = _lexer.current(); @@ -11529,8 +11550,11 @@ PUGI__NS_BEGIN { _lexer.next(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); + if (!n) return 0; + + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; return parse_relative_location_path(n); } @@ -11553,7 +11577,6 @@ PUGI__NS_BEGIN // PrimaryExpr begins with '$' in case of it being a variable reference, // '(' in case of it being an expression, string literal, number constant or // function call. - if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || _lexer.current() == lex_string) @@ -11565,7 +11588,8 @@ PUGI__NS_BEGIN while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; - if (*state != '(') return parse_location_path(); + if (*state != '(') + return parse_location_path(); // This looks like a function call; however this still can be a node-test. Check it. if (parse_node_test_type(_lexer.contents()) != nodetest_none) @@ -11573,6 +11597,7 @@ PUGI__NS_BEGIN } xpath_ast_node* n = parse_filter_expression(); + if (!n) return 0; if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { @@ -11582,9 +11607,10 @@ PUGI__NS_BEGIN if (l == lex_double_slash) { if (n->rettype() != xpath_type_node_set) - throw_error("Step has to be applied to node set"); + return error("Step has to be applied to node set"); - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; } // select from location path @@ -11598,9 +11624,10 @@ PUGI__NS_BEGIN _lexer.next(); // precedence 7+ - only parses union expressions - xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); + xpath_ast_node* n = parse_expression(7); + if (!n) return 0; - return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); + return alloc_node(ast_op_negate, xpath_type_number, n); } else { @@ -11683,20 +11710,23 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* rhs = parse_path_or_unary_expression(); + if (!rhs) return 0; binary_op_t nextop = binary_op_t::parse(_lexer); while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) { rhs = parse_expression_rec(rhs, nextop.precedence); + if (!rhs) return 0; nextop = binary_op_t::parse(_lexer); } if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) - throw_error("Union operator has to be applied to node sets"); + return error("Union operator has to be applied to node sets"); - lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); + lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); + if (!lhs) return 0; op = binary_op_t::parse(_lexer); } @@ -11722,9 +11752,12 @@ PUGI__NS_BEGIN // | MultiplicativeExpr '*' UnaryExpr // | MultiplicativeExpr 'div' UnaryExpr // | MultiplicativeExpr 'mod' UnaryExpr - xpath_ast_node* parse_expression() + xpath_ast_node* parse_expression(int limit = 0) { - return parse_expression_rec(parse_path_or_unary_expression(), 0); + xpath_ast_node* n = parse_path_or_unary_expression(); + if (!n) return 0; + + return parse_expression_rec(n, limit); } xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) @@ -11733,26 +11766,21 @@ PUGI__NS_BEGIN xpath_ast_node* parse() { - xpath_ast_node* result = parse_expression(); + xpath_ast_node* n = parse_expression(); + if (!n) return 0; // check if there are unparsed tokens left if (_lexer.current() != lex_eof) - throw_error("Incorrect query"); + return error("Incorrect query"); - return result; + return n; } static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) { xpath_parser parser(query, variables, alloc, result); - #ifdef PUGIXML_NO_EXCEPTIONS - int error = setjmp(parser._error_handler); - - return (error == 0) ? parser.parse() : 0; - #else return parser.parse(); - #endif } }; @@ -11775,7 +11803,7 @@ PUGI__NS_BEGIN xml_memory::deallocate(impl); } - xpath_query_impl(): root(0), alloc(&block) + xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) { block.next = 0; block.capacity = sizeof(block.data); @@ -11784,19 +11812,27 @@ PUGI__NS_BEGIN xpath_ast_node* root; xpath_allocator alloc; xpath_memory_block block; + bool oom; }; PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) { if (!impl) return xpath_string(); - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_string(); - #endif - xpath_context c(n, 1, 1); - return impl->root->eval_string(c, sd.stack); + xpath_string r = impl->root->eval_string(c, sd.stack); + + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_string(); + #else + throw std::bad_alloc(); + #endif + } + + return r; } PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) @@ -12369,6 +12405,15 @@ namespace pugi _impl = impl.release(); _result.error = 0; } + else + { + #ifdef PUGIXML_NO_EXCEPTIONS + if (qimpl->oom) _result.error = "Out of memory"; + #else + if (qimpl->oom) throw std::bad_alloc(); + throw xpath_exception(_result); + #endif + } } } @@ -12421,11 +12466,18 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return false; - #endif + bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); - return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return false; + #else + throw std::bad_alloc(); + #endif + } + + return r; } PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const @@ -12435,11 +12487,18 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return impl::gen_nan(); - #endif + double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); + + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return impl::gen_nan(); + #else + throw std::bad_alloc(); + #endif + } - return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); + return r; } #ifndef PUGIXML_NO_STL @@ -12481,12 +12540,17 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node_set(); - #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_node_set(); + #else + throw std::bad_alloc(); + #endif + } + return xpath_node_set(r.begin(), r.end(), r.type()); } @@ -12498,12 +12562,17 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node(); - #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_node(); + #else + throw std::bad_alloc(); + #endif + } + return r.first(); } diff --git a/tests/test_compact.cpp b/tests/test_compact.cpp new file mode 100644 index 0000000..f9560c9 --- /dev/null +++ b/tests/test_compact.cpp @@ -0,0 +1,114 @@ +#ifdef PUGIXML_COMPACT +#include "common.hpp" + +static void overflow_hash_table(xml_document& doc) +{ + xml_node n = doc.child(STR("n")); + + // compact encoding assumes next_sibling is a forward-only pointer so we can allocate hash entries by reordering nodes + // we allocate enough hash entries to be exactly on the edge of rehash threshold + for (int i = 0; i < 8; ++i) + CHECK(n.prepend_child(node_element)); +} + +TEST_XML_FLAGS(compact_out_of_memory_string, "<n a='v'/><?n v?>", parse_pi) +{ + test_runner::_memory_fail_threshold = 1; + + overflow_hash_table(doc); + + xml_attribute a = doc.child(STR("n")).attribute(STR("a")); + xml_node pi = doc.last_child(); + + CHECK_ALLOC_FAIL(CHECK(!pi.set_name(STR("name")))); + CHECK_ALLOC_FAIL(CHECK(!pi.set_value(STR("value")))); + CHECK_ALLOC_FAIL(CHECK(!a.set_name(STR("name")))); + CHECK_ALLOC_FAIL(CHECK(!a.set_value(STR("value")))); +} + +TEST_XML(compact_out_of_memory_attribute, "<n a='v'/>") +{ + test_runner::_memory_fail_threshold = 1; + + overflow_hash_table(doc); + + xml_node n = doc.child(STR("n")); + xml_attribute a = n.attribute(STR("a")); + + CHECK_ALLOC_FAIL(CHECK(!n.append_attribute(STR("")))); + CHECK_ALLOC_FAIL(CHECK(!n.prepend_attribute(STR("")))); + CHECK_ALLOC_FAIL(CHECK(!n.insert_attribute_after(STR(""), a))); + CHECK_ALLOC_FAIL(CHECK(!n.insert_attribute_before(STR(""), a))); +} + +TEST_XML(compact_out_of_memory_attribute_copy, "<n a='v'/>") +{ + test_runner::_memory_fail_threshold = 1; + + overflow_hash_table(doc); + + xml_node n = doc.child(STR("n")); + xml_attribute a = n.attribute(STR("a")); + + CHECK_ALLOC_FAIL(CHECK(!n.append_copy(a))); + CHECK_ALLOC_FAIL(CHECK(!n.prepend_copy(a))); + CHECK_ALLOC_FAIL(CHECK(!n.insert_copy_after(a, a))); + CHECK_ALLOC_FAIL(CHECK(!n.insert_copy_before(a, a))); +} + +TEST_XML(compact_out_of_memory_node, "<n/>") +{ + test_runner::_memory_fail_threshold = 1; + + overflow_hash_table(doc); + + xml_node n = doc.child(STR("n")); + + CHECK_ALLOC_FAIL(CHECK(!doc.append_child(node_element))); + CHECK_ALLOC_FAIL(CHECK(!doc.prepend_child(node_element))); + CHECK_ALLOC_FAIL(CHECK(!doc.insert_child_after(node_element, n))); + CHECK_ALLOC_FAIL(CHECK(!doc.insert_child_before(node_element, n))); +} + +TEST_XML(compact_out_of_memory_node_copy, "<n/>") +{ + test_runner::_memory_fail_threshold = 1; + + overflow_hash_table(doc); + + xml_node n = doc.child(STR("n")); + + CHECK_ALLOC_FAIL(CHECK(!doc.append_copy(n))); + CHECK_ALLOC_FAIL(CHECK(!doc.prepend_copy(n))); + CHECK_ALLOC_FAIL(CHECK(!doc.insert_copy_after(n, n))); + CHECK_ALLOC_FAIL(CHECK(!doc.insert_copy_before(n, n))); +} + +TEST_XML(compact_out_of_memory_node_move, "<n/><ne/>") +{ + test_runner::_memory_fail_threshold = 1; + + overflow_hash_table(doc); + + xml_node n = doc.child(STR("n")); + xml_node ne = doc.child(STR("ne")); + + CHECK_ALLOC_FAIL(CHECK(!doc.append_move(n))); + CHECK_ALLOC_FAIL(CHECK(!doc.prepend_move(n))); + CHECK_ALLOC_FAIL(CHECK(!doc.insert_move_after(n, ne))); + CHECK_ALLOC_FAIL(CHECK(!doc.insert_move_before(n, ne))); +} + +TEST_XML(compact_out_of_memory_remove, "<n a='v'/>") +{ + test_runner::_memory_fail_threshold = 1; + + overflow_hash_table(doc); + + xml_node n = doc.child(STR("n")); + xml_attribute a = n.attribute(STR("a")); + + CHECK_ALLOC_FAIL(CHECK(!n.remove_attribute(a))); + CHECK_ALLOC_FAIL(CHECK(!doc.remove_child(n))); +} +#endif diff --git a/tests/test_document.cpp b/tests/test_document.cpp index c7219e1..95bd873 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -496,6 +496,15 @@ TEST_XML(document_save_declaration_latin1, "<node/>") CHECK(writer.as_narrow() == "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<node />\n"); } +TEST_XML(document_save_declaration_raw, "<node/>") +{ + xml_writer_string writer; + + doc.save(writer, STR(""), pugi::format_raw, get_native_encoding()); + + CHECK(writer.as_string() == STR("<?xml version=\"1.0\"?><node/>")); +} + struct temp_file { char path[512]; diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index f2d7ea8..7b26c5f 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -389,7 +389,7 @@ TEST_XML(dom_node_append_copy_attribute, "<node a1='v1'><child a2='v2'/><child/> CHECK_NODE(doc, STR("<node a1=\"v1\" a3=\"v3\" a4=\"v4\"><child a2=\"v2\"/><child a5=\"v5\"/></node>")); } -TEST_XML(dom_node_insert_copy_after_attribute, "<node a1='v1'><child a2='v2'/></node>") +TEST_XML(dom_node_insert_copy_after_attribute, "<node a1='v1'><child a2='v2'/>text</node>") { CHECK(xml_node().insert_copy_after(xml_attribute(), xml_attribute()) == xml_attribute()); @@ -402,6 +402,7 @@ TEST_XML(dom_node_insert_copy_after_attribute, "<node a1='v1'><child a2='v2'/></ CHECK(node.insert_copy_after(a1, xml_attribute()) == xml_attribute()); CHECK(node.insert_copy_after(xml_attribute(), a1) == xml_attribute()); CHECK(node.insert_copy_after(a2, a2) == xml_attribute()); + CHECK(node.last_child().insert_copy_after(a2, a2) == xml_attribute()); xml_attribute a3 = node.insert_copy_after(a1, a1); CHECK(a3 && a3 != a2 && a3 != a1); @@ -414,7 +415,7 @@ TEST_XML(dom_node_insert_copy_after_attribute, "<node a1='v1'><child a2='v2'/></ CHECK(child.insert_copy_after(a4, a4) == xml_attribute()); - CHECK_NODE(doc, STR("<node a1=\"v1\" a2=\"v2\" a2=\"v2\" a1=\"v1\"><child a2=\"v2\"/></node>")); + CHECK_NODE(doc, STR("<node a1=\"v1\" a2=\"v2\" a2=\"v2\" a1=\"v1\"><child a2=\"v2\"/>text</node>")); a3.set_name(STR("a3")); a3 = STR("v3"); @@ -425,10 +426,10 @@ TEST_XML(dom_node_insert_copy_after_attribute, "<node a1='v1'><child a2='v2'/></ a5.set_name(STR("a5")); a5 = STR("v5"); - CHECK_NODE(doc, STR("<node a1=\"v1\" a5=\"v5\" a4=\"v4\" a3=\"v3\"><child a2=\"v2\"/></node>")); + CHECK_NODE(doc, STR("<node a1=\"v1\" a5=\"v5\" a4=\"v4\" a3=\"v3\"><child a2=\"v2\"/>text</node>")); } -TEST_XML(dom_node_insert_copy_before_attribute, "<node a1='v1'><child a2='v2'/></node>") +TEST_XML(dom_node_insert_copy_before_attribute, "<node a1='v1'><child a2='v2'/>text</node>") { CHECK(xml_node().insert_copy_before(xml_attribute(), xml_attribute()) == xml_attribute()); @@ -441,6 +442,7 @@ TEST_XML(dom_node_insert_copy_before_attribute, "<node a1='v1'><child a2='v2'/>< CHECK(node.insert_copy_before(a1, xml_attribute()) == xml_attribute()); CHECK(node.insert_copy_before(xml_attribute(), a1) == xml_attribute()); CHECK(node.insert_copy_before(a2, a2) == xml_attribute()); + CHECK(node.last_child().insert_copy_before(a2, a2) == xml_attribute()); xml_attribute a3 = node.insert_copy_before(a1, a1); CHECK(a3 && a3 != a2 && a3 != a1); @@ -453,7 +455,7 @@ TEST_XML(dom_node_insert_copy_before_attribute, "<node a1='v1'><child a2='v2'/>< CHECK(child.insert_copy_before(a4, a4) == xml_attribute()); - CHECK_NODE(doc, STR("<node a1=\"v1\" a2=\"v2\" a2=\"v2\" a1=\"v1\"><child a2=\"v2\"/></node>")); + CHECK_NODE(doc, STR("<node a1=\"v1\" a2=\"v2\" a2=\"v2\" a1=\"v1\"><child a2=\"v2\"/>text</node>")); a3.set_name(STR("a3")); a3 = STR("v3"); @@ -464,7 +466,7 @@ TEST_XML(dom_node_insert_copy_before_attribute, "<node a1='v1'><child a2='v2'/>< a5.set_name(STR("a5")); a5 = STR("v5"); - CHECK_NODE(doc, STR("<node a3=\"v3\" a4=\"v4\" a5=\"v5\" a1=\"v1\"><child a2=\"v2\"/></node>")); + CHECK_NODE(doc, STR("<node a3=\"v3\" a4=\"v4\" a5=\"v5\" a1=\"v1\"><child a2=\"v2\"/>text</node>")); } TEST_XML(dom_node_remove_attribute, "<node a1='v1' a2='v2' a3='v3'><child a4='v4'/></node>") @@ -550,6 +552,7 @@ TEST_XML(dom_node_insert_child_after, "<node>foo<child/></node>") xml_node node = doc.child(STR("node")); xml_node child = node.child(STR("child")); + CHECK(node.insert_child_after(node_element, xml_node()) == xml_node()); CHECK(node.insert_child_after(node_element, node) == xml_node()); CHECK(child.insert_child_after(node_element, node) == xml_node()); @@ -584,6 +587,7 @@ TEST_XML(dom_node_insert_child_before, "<node>foo<child/></node>") xml_node node = doc.child(STR("node")); xml_node child = node.child(STR("child")); + CHECK(node.insert_child_before(node_element, xml_node()) == xml_node()); CHECK(node.insert_child_before(node_element, node) == xml_node()); CHECK(child.insert_child_before(node_element, node) == xml_node()); @@ -770,13 +774,16 @@ TEST_XML(dom_node_append_copy, "<node>foo<child/></node>") TEST_XML(dom_node_insert_copy_after, "<node>foo<child/></node>") { + xml_node child = doc.child(STR("node")).child(STR("child")); + CHECK(xml_node().insert_copy_after(xml_node(), xml_node()) == xml_node()); CHECK(doc.child(STR("node")).first_child().insert_copy_after(doc.child(STR("node")), doc.child(STR("node"))) == xml_node()); CHECK(doc.insert_copy_after(doc, doc) == xml_node()); CHECK(doc.insert_copy_after(xml_node(), doc.child(STR("node"))) == xml_node()); CHECK(doc.insert_copy_after(doc.child(STR("node")), xml_node()) == xml_node()); + CHECK(doc.insert_copy_after(doc.child(STR("node")), child) == xml_node()); - xml_node n1 = doc.child(STR("node")).insert_copy_after(doc.child(STR("node")).child(STR("child")), doc.child(STR("node")).first_child()); + xml_node n1 = doc.child(STR("node")).insert_copy_after(child, doc.child(STR("node")).first_child()); CHECK(n1); CHECK_STRING(n1.name(), STR("child")); CHECK_NODE(doc, STR("<node>foo<child/><child/></node>")); @@ -794,13 +801,16 @@ TEST_XML(dom_node_insert_copy_after, "<node>foo<child/></node>") TEST_XML(dom_node_insert_copy_before, "<node>foo<child/></node>") { + xml_node child = doc.child(STR("node")).child(STR("child")); + CHECK(xml_node().insert_copy_before(xml_node(), xml_node()) == xml_node()); CHECK(doc.child(STR("node")).first_child().insert_copy_before(doc.child(STR("node")), doc.child(STR("node"))) == xml_node()); CHECK(doc.insert_copy_before(doc, doc) == xml_node()); CHECK(doc.insert_copy_before(xml_node(), doc.child(STR("node"))) == xml_node()); CHECK(doc.insert_copy_before(doc.child(STR("node")), xml_node()) == xml_node()); + CHECK(doc.insert_copy_before(doc.child(STR("node")), child) == xml_node()); - xml_node n1 = doc.child(STR("node")).insert_copy_before(doc.child(STR("node")).child(STR("child")), doc.child(STR("node")).first_child()); + xml_node n1 = doc.child(STR("node")).insert_copy_before(child, doc.child(STR("node")).first_child()); CHECK(n1); CHECK_STRING(n1.name(), STR("child")); CHECK_NODE(doc, STR("<node><child/>foo<child/></node>")); @@ -1314,6 +1324,7 @@ TEST_XML(dom_node_insert_move_after, "<node>foo<child>bar</child></node>") CHECK(doc.insert_move_after(doc, doc) == xml_node()); CHECK(doc.insert_move_after(xml_node(), doc.child(STR("node"))) == xml_node()); CHECK(doc.insert_move_after(doc.child(STR("node")), xml_node()) == xml_node()); + CHECK(doc.insert_move_after(doc.child(STR("node")), child) == xml_node()); xml_node n1 = doc.child(STR("node")).insert_move_after(child, doc.child(STR("node")).first_child()); CHECK(n1 && n1 == child); @@ -1340,6 +1351,7 @@ TEST_XML(dom_node_insert_move_before, "<node>foo<child>bar</child></node>") CHECK(doc.insert_move_before(doc, doc) == xml_node()); CHECK(doc.insert_move_before(xml_node(), doc.child(STR("node"))) == xml_node()); CHECK(doc.insert_move_before(doc.child(STR("node")), xml_node()) == xml_node()); + CHECK(doc.insert_move_before(doc.child(STR("node")), child) == xml_node()); xml_node n1 = doc.child(STR("node")).insert_move_before(child, doc.child(STR("node")).first_child()); CHECK(n1 && n1 == child); diff --git a/tests/test_dom_traverse.cpp b/tests/test_dom_traverse.cpp index f977e15..3d30a82 100644 --- a/tests/test_dom_traverse.cpp +++ b/tests/test_dom_traverse.cpp @@ -137,6 +137,10 @@ TEST_XML(dom_attr_as_integer_space, "<node attr1=' \t1234' attr2='\t 0x123' attr CHECK(node.attribute(STR("attr2")).as_int() == 291); CHECK(node.attribute(STR("attr3")).as_int() == 0); CHECK(node.attribute(STR("attr4")).as_int() == 0); + +#ifdef PUGIXML_HAS_LONG_LONG + CHECK(node.attribute(STR("attr1")).as_llong() == 1234); +#endif } TEST_XML(dom_attr_as_float, "<node attr1='0' attr2='1' attr3='0.12' attr4='-5.1' attr5='3e-4' attr6='3.14159265358979323846'/>") @@ -736,6 +740,9 @@ TEST_XML(dom_node_path, "<node><child1>text<child2/></child1></node>") CHECK(doc.child(STR("node")).child(STR("child1")).first_child().path() == STR("/node/child1/")); CHECK(doc.child(STR("node")).child(STR("child1")).path('\\') == STR("\\node\\child1")); + + doc.append_child(node_element); + CHECK(doc.last_child().path() == STR("/")); } #endif @@ -1274,3 +1281,17 @@ TEST_XML(dom_as_int_plus, "<node attr1='+1' attr2='+0xa' />") CHECK(node.attribute(STR("attr2")).as_ullong() == 10); #endif } + +TEST(dom_node_anonymous) +{ + xml_document doc; + doc.append_child(node_element); + doc.append_child(node_element); + doc.append_child(node_pcdata); + + CHECK(doc.child(STR("node")) == xml_node()); + CHECK(doc.first_child().next_sibling(STR("node")) == xml_node()); + CHECK(doc.last_child().previous_sibling(STR("node")) == xml_node()); + CHECK_STRING(doc.child_value(), STR("")); + CHECK_STRING(doc.last_child().child_value(), STR("")); +} diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index ba45a45..013bca9 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -88,6 +88,16 @@ TEST(parse_pi_error) CHECK(doc.load_string(STR("<?name& x?>"), parse_fragment | parse_pi).status == status_bad_pi); } +TEST(parse_pi_error_buffer_boundary) +{ + char buf1[] = "<?name?>"; + char buf2[] = "<?name?x"; + + xml_document doc; + CHECK(doc.load_buffer_inplace(buf1, 8, parse_fragment | parse_pi)); + CHECK(doc.load_buffer_inplace(buf2, 8, parse_fragment | parse_pi).status == status_bad_pi); +} + TEST(parse_comments_skip) { xml_document doc; @@ -746,6 +756,36 @@ TEST(parse_attribute_quot_inside) } } +TEST(parse_attribute_wnorm_coverage) +{ + xml_document doc; + CHECK(doc.load_string(STR("<n a1='v' a2=' ' a3='x y' a4='x y' a5='x y' />"), parse_wnorm_attribute)); + CHECK_NODE(doc, STR("<n a1=\"v\" a2=\"\" a3=\"x y\" a4=\"x y\" a5=\"x y\"/>")); + + CHECK(doc.load_string(STR("<n a1='v' a2=' ' a3='x y' a4='x y' a5='x y' />"), parse_wnorm_attribute | parse_escapes)); + CHECK_NODE(doc, STR("<n a1=\"v\" a2=\"\" a3=\"x y\" a4=\"x y\" a5=\"x y\"/>")); +} + +TEST(parse_attribute_wconv_coverage) +{ + xml_document doc; + CHECK(doc.load_string(STR("<n a1='v' a2='\r' a3='\r\n\n' a4='\n' />"), parse_wconv_attribute)); + CHECK_NODE(doc, STR("<n a1=\"v\" a2=\" \" a3=\" \" a4=\" \"/>")); + + CHECK(doc.load_string(STR("<n a1='v' a2='\r' a3='\r\n\n' a4='\n' />"), parse_wconv_attribute | parse_escapes)); + CHECK_NODE(doc, STR("<n a1=\"v\" a2=\" \" a3=\" \" a4=\" \"/>")); +} + +TEST(parse_attribute_eol_coverage) +{ + xml_document doc; + CHECK(doc.load_string(STR("<n a1='v' a2='\r' a3='\r\n\n' a4='\n' />"), parse_eol)); + CHECK_NODE(doc, STR("<n a1=\"v\" a2=\" \" a3=\" \" a4=\" \"/>")); + + CHECK(doc.load_string(STR("<n a1='v' a2='\r' a3='\r\n\n' a4='\n' />"), parse_eol | parse_escapes)); + CHECK_NODE(doc, STR("<n a1=\"v\" a2=\" \" a3=\" \" a4=\" \"/>")); +} + TEST(parse_tag_single) { xml_document doc; @@ -928,7 +968,7 @@ TEST(parse_out_of_memory_halfway_attr) TEST(parse_out_of_memory_conversion) { - test_runner::_memory_fail_threshold = 256; + test_runner::_memory_fail_threshold = 1; xml_document doc; CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("<foo\x90/>", 7, parse_default, encoding_latin1).status == status_out_of_memory)); @@ -1183,6 +1223,33 @@ TEST(parse_embed_pcdata) } } +TEST_XML_FLAGS(parse_embed_pcdata_fragment, "text", parse_fragment | parse_embed_pcdata) +{ + CHECK_NODE(doc, STR("text")); + CHECK(doc.first_child().type() == node_pcdata); + CHECK_STRING(doc.first_child().value(), STR("text")); +} + +TEST_XML_FLAGS(parse_embed_pcdata_child, "<n><child/>text</n>", parse_embed_pcdata) +{ + xml_node n = doc.child(STR("n")); + + CHECK_NODE(doc, STR("<n><child/>text</n>")); + CHECK(n.last_child().type() == node_pcdata); + CHECK_STRING(n.last_child().value(), STR("text")); +} + +TEST_XML_FLAGS(parse_embed_pcdata_comment, "<n>text1<!---->text2</n>", parse_embed_pcdata) +{ + xml_node n = doc.child(STR("n")); + + CHECK_NODE(doc, STR("<n>text1text2</n>")); + CHECK_STRING(n.value(), STR("text1")); + CHECK(n.first_child() == n.last_child()); + CHECK(n.last_child().type() == node_pcdata); + CHECK_STRING(n.last_child().value(), STR("text2")); +} + TEST(parse_encoding_detect) { char test[] = "<?xml version='1.0' encoding='utf-8'?><n/>"; @@ -1206,3 +1273,83 @@ TEST(parse_encoding_detect_latin1) CHECK(doc.load_buffer(test3, sizeof(test3)).encoding == encoding_latin1); CHECK(doc.load_buffer(test4, sizeof(test4)).encoding == encoding_latin1); } + +TEST(parse_encoding_detect_auto) +{ + struct data_t + { + const char* contents; + size_t size; + xml_encoding encoding; + }; + + const data_t data[] = + { + // BOM + { "\x00\x00\xfe\xff", 4, encoding_utf32_be }, + { "\xff\xfe\x00\x00", 4, encoding_utf32_le }, + { "\xfe\xff ", 4, encoding_utf16_be }, + { "\xff\xfe ", 4, encoding_utf16_le }, + { "\xef\xbb\xbf ", 4, encoding_utf8 }, + // automatic tag detection for < or <? + { "\x00\x00\x00<\x00\x00\x00n\x00\x00\x00/\x00\x00\x00>", 16, encoding_utf32_be }, + { "<\x00\x00\x00n\x00\x00\x00/\x00\x00\x00>\x00\x00\x00", 16, encoding_utf32_le }, + { "\x00<\x00?\x00n\x00?\x00>", 10, encoding_utf16_be }, + { "<\x00?\x00n\x00?\x00>\x00", 10, encoding_utf16_le }, + { "\x00<\x00n\x00/\x00>", 8, encoding_utf16_be }, + { "<\x00n\x00/\x00>\x00", 8, encoding_utf16_le }, + // <?xml encoding + { "<?xml encoding='latin1'?>", 25, encoding_latin1 }, + }; + + for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i) + { + xml_document doc; + xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment); + + CHECK(result); + CHECK(result.encoding == data[i].encoding); + } +} + +TEST(parse_encoding_detect_auto_incomplete) +{ + struct data_t + { + const char* contents; + size_t size; + xml_encoding encoding; + }; + + const data_t data[] = + { + // BOM + { "\x00\x00\xfe ", 4, encoding_utf8 }, + { "\x00\x00 ", 4, encoding_utf8 }, + { "\xff\xfe\x00 ", 4, encoding_utf16_le }, + { "\xfe ", 4, encoding_utf8 }, + { "\xff ", 4, encoding_utf8 }, + { "\xef\xbb ", 4, encoding_utf8 }, + { "\xef ", 4, encoding_utf8 }, + // automatic tag detection for < or <? + { "\x00\x00\x00 ", 4, encoding_utf8 }, + { "<\x00\x00n/\x00>\x00", 8, encoding_utf16_le }, + { "\x00<n\x00\x00/\x00>", 8, encoding_utf16_be }, + { "<\x00?n/\x00>\x00", 8, encoding_utf16_le }, + { "\x00 ", 8, encoding_utf8 }, + // <?xml encoding + { "<?xmC encoding='latin1'?>", 25, encoding_utf8 }, + { "<?xBC encoding='latin1'?>", 25, encoding_utf8 }, + { "<?ABC encoding='latin1'?>", 25, encoding_utf8 }, + { "<_ABC encoding='latin1'/>", 25, encoding_utf8 }, + }; + + for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i) + { + xml_document doc; + xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment); + + CHECK(result); + CHECK(result.encoding == data[i].encoding); + } +} diff --git a/tests/test_write.cpp b/tests/test_write.cpp index d5f3dad..5cd92a5 100644 --- a/tests/test_write.cpp +++ b/tests/test_write.cpp @@ -69,6 +69,12 @@ TEST_XML_FLAGS(write_cdata_escape, "<![CDATA[value]]>", parse_cdata | parse_frag doc.first_child().set_value(STR("1]]>2]]>3")); CHECK_NODE(doc, STR("<![CDATA[1]]]]><![CDATA[>2]]]]><![CDATA[>3]]>")); + + doc.first_child().set_value(STR("1]")); + CHECK_NODE(doc, STR("<![CDATA[1]]]>")); + + doc.first_child().set_value(STR("1]]")); + CHECK_NODE(doc, STR("<![CDATA[1]]]]>")); } TEST_XML(write_cdata_inner, "<node><![CDATA[value]]></node>") diff --git a/tests/test_xpath.cpp b/tests/test_xpath.cpp index 33c1696..6cae607 100644 --- a/tests/test_xpath.cpp +++ b/tests/test_xpath.cpp @@ -367,6 +367,32 @@ TEST(xpath_large_node_set) CHECK(ns.size() == 10001); } +TEST(xpath_out_of_memory_query) +{ + test_runner::_memory_fail_threshold = 1; + + CHECK_ALLOC_FAIL(xpath_query q(STR("node"))); +} + +TEST_XML(xpath_out_of_memory_evaluate, "<n/>") +{ + test_runner::_memory_fail_threshold = 4196 * sizeof(char_t) + 4096 * 2 + 32768; + + std::basic_string<char_t> query = STR("*[concat(\"a\", \""); + + query.resize(4196, 'a'); + query += STR("\")]"); + + pugi::xpath_query q(query.c_str()); + + CHECK_ALLOC_FAIL(CHECK(q.evaluate_boolean(doc) == false)); + CHECK_ALLOC_FAIL(CHECK_DOUBLE_NAN(q.evaluate_number(doc))); + CHECK_ALLOC_FAIL(CHECK(q.evaluate_string(doc).empty())); + CHECK_ALLOC_FAIL(CHECK(q.evaluate_string(0, 0, doc) == 1)); + CHECK_ALLOC_FAIL(CHECK(q.evaluate_node(doc) == xpath_node())); + CHECK_ALLOC_FAIL(CHECK(q.evaluate_node_set(doc).empty())); +} + TEST(xpath_out_of_memory_evaluate_concat) { test_runner::_memory_fail_threshold = 4196 * sizeof(char_t) + 4096 * 2; @@ -404,15 +430,83 @@ TEST_XML(xpath_out_of_memory_evaluate_union, "<node><a/><a/><a/><a/><a/><a/><a/> CHECK_ALLOC_FAIL(CHECK(q.evaluate_node_set(doc.child(STR("node"))).empty())); } -TEST_XML(xpath_out_of_memory_evaluate_predicate, "<node><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/></node>") +TEST_XML(xpath_out_of_memory_evaluate_predicate, "<node><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/><a/></node>") { test_runner::_memory_fail_threshold = 32768 + 4096 * 2; - pugi::xpath_query q(STR("//a[//a[//a[//a[//a[//a[//a[//a[//a[//a[//a[//a[//a[//a[true()]]]]]]]]]]]]]]")); + pugi::xpath_query q(STR("//a[//a[//a[//a[true()]]]]")); CHECK_ALLOC_FAIL(CHECK(q.evaluate_node_set(doc).empty())); } +TEST_XML(xpath_out_of_memory_evaluate_normalize_space_0, "<node> a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z </node>") +{ + test_runner::_memory_fail_threshold = 32768 + 4096 * 2; + + pugi::xpath_query q(STR("concat(normalize-space(), normalize-space(), normalize-space(), normalize-space(), normalize-space(), normalize-space(), normalize-space(), normalize-space())")); + + CHECK_ALLOC_FAIL(CHECK(q.evaluate_string(doc.first_child()).empty())); +} + +TEST_XML(xpath_out_of_memory_evaluate_normalize_space_1, "<node> a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z </node>") +{ + test_runner::_memory_fail_threshold = 32768 + 4096 * 2; + + pugi::xpath_query q(STR("concat(normalize-space(node), normalize-space(node), normalize-space(node), normalize-space(node), normalize-space(node), normalize-space(node), normalize-space(node), normalize-space(node))")); + + CHECK_ALLOC_FAIL(CHECK(q.evaluate_string(doc).empty())); +} + +TEST_XML(xpath_out_of_memory_evaluate_translate, "<node> a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z </node>") +{ + test_runner::_memory_fail_threshold = 32768 + 4096 * 2; + + pugi::xpath_query q(STR("concat(translate(node, 'a', '\xe9'), translate(node, 'a', '\xe9'), translate(node, 'a', '\xe9'), translate(node, 'a', '\xe9'), translate(node, 'a', '\xe9'), translate(node, 'a', '\xe9'), translate(node, 'a', '\xe9'), translate(node, 'a', '\xe9'))")); + + CHECK_ALLOC_FAIL(CHECK(q.evaluate_string(doc).empty())); +} + +TEST_XML(xpath_out_of_memory_evaluate_translate_table, "<node> a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z </node>") +{ + test_runner::_memory_fail_threshold = 32768 + 4096 * 2; + + pugi::xpath_query q(STR("concat(translate(node, 'a', 'A'), translate(node, 'a', 'A'), translate(node, 'a', 'A'), translate(node, 'a', 'A'), translate(node, 'a', 'A'), translate(node, 'a', 'A'), translate(node, 'a', 'A'), translate(node, 'a', 'A'))")); + + CHECK_ALLOC_FAIL(CHECK(q.evaluate_string(doc).empty())); +} + +TEST(xpath_out_of_memory_evaluate_string_append) +{ + test_runner::_memory_fail_threshold = 32768 + 4096 * 2; + + std::basic_string<char_t> literal(5000, 'a'); + + std::basic_string<char_t> buf; + buf += STR("<n><c>text</c><c>"); + buf += literal; + buf += STR("</c></n>"); + + xml_document doc; + CHECK(doc.load_buffer_inplace(&buf[0], buf.size() * sizeof(char_t))); + + pugi::xpath_query q(STR("string(n)")); + CHECK(q); + + CHECK_ALLOC_FAIL(CHECK(q.evaluate_string(doc).empty())); +} + +TEST(xpath_out_of_memory_evaluate_number_to_string) +{ + test_runner::_memory_fail_threshold = 4096 + 128; + + xpath_variable_set vars; + vars.set(STR("x"), 1e+308); + + xpath_query q(STR("concat($x, $x, $x, $x, $x, $x, $x, $x, $x, $x, $x, $x, $x, $x, $x, $x, $x)"), &vars); + + CHECK_ALLOC_FAIL(CHECK(q.evaluate_string(xml_node()).empty())); +} + TEST(xpath_memory_concat_massive) { pugi::xml_document doc; @@ -578,6 +672,17 @@ TEST(xpath_sort_crossdoc_different_depth) CHECK((ns[0] == ns1[0] && ns[1] == ns2[0]) || (ns[0] == ns2[0] && ns[1] == ns1[0])); } +TEST_XML(xpath_sort_empty_node, "<node><child1/><child2/></node>") +{ + xml_node n = doc.child(STR("node")); + xpath_node nodes[] = { n.child(STR("child2")), xml_node(), n.child(STR("child1")), xml_node() }; + xpath_node_set ns(nodes, nodes + sizeof(nodes) / sizeof(nodes[0])); + + ns.sort(); + + CHECK(!ns[0] && !ns[1] && ns[2] == nodes[2] && ns[3] == nodes[0]); +} + TEST(xpath_allocate_string_out_of_memory) { std::basic_string<char_t> query; @@ -612,4 +717,15 @@ TEST(xpath_remove_duplicates) tester % (2 + i); } } + +TEST(xpath_anonymous_nodes) +{ + xml_document doc; + doc.append_child(node_element); + doc.append_child(node_pi); + + CHECK_XPATH_NODESET(doc, STR("/name")); + CHECK_XPATH_NODESET(doc, STR("/processing-instruction('a')")); + CHECK_XPATH_NODESET(doc, STR("/ns:*")); +} #endif diff --git a/tests/test_xpath_api.cpp b/tests/test_xpath_api.cpp index c1a4968..3f05e13 100644 --- a/tests/test_xpath_api.cpp +++ b/tests/test_xpath_api.cpp @@ -107,6 +107,7 @@ TEST_XML(xpath_api_nodeset_accessors, "<node><foo/><foo/></node>") TEST_XML(xpath_api_nodeset_copy, "<node><foo/><foo/></node>") { + xpath_node_set empty; xpath_node_set set = doc.select_nodes(STR("node/foo")); xpath_node_set copy1 = set; @@ -132,7 +133,7 @@ TEST_XML(xpath_api_nodeset_copy, "<node><foo/><foo/></node>") xpath_node_set copy5; copy5 = set; - copy5 = xpath_node_set(); + copy5 = empty; CHECK(copy5.size() == 0); } @@ -572,6 +573,18 @@ TEST(xpath_api_nodeset_move_assign_empty) CHECK(move.type() == xpath_node_set::type_sorted); } +TEST_XML(xpath_api_nodeset_move_assign_self, "<node><foo/><foo/><bar/></node>") +{ + xpath_node_set set = doc.select_nodes(STR("node/bar")); + + CHECK(set.size() == 1); + CHECK(set.type() == xpath_node_set::type_sorted); + + test_runner::_memory_fail_threshold = 1; + + set = std::move(*&set); +} + TEST(xpath_api_query_move) { xml_node c; diff --git a/tests/test_xpath_functions.cpp b/tests/test_xpath_functions.cpp index 211dbfb..480eb97 100644 --- a/tests/test_xpath_functions.cpp +++ b/tests/test_xpath_functions.cpp @@ -566,6 +566,7 @@ TEST(xpath_string_translate_table) CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc', 'ABC')"), STR("ABCd\xe9 ")); CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc\xe9', 'ABC!')"), STR("ABCd! ")); + CHECK_XPATH_STRING(c, STR("translate('abcd! ', 'abc!', 'ABC\xe9')"), STR("ABCd\xe9 ")); CHECK_XPATH_STRING(c, STR("translate('abcde', concat('abc', 'd'), 'ABCD')"), STR("ABCDe")); CHECK_XPATH_STRING(c, STR("translate('abcde', 'abcd', concat('ABC', 'D'))"), STR("ABCDe")); } @@ -799,4 +800,17 @@ TEST_XML(xpath_string_concat_translate, "<node>foobar</node>") CHECK_XPATH_STRING(doc, STR("concat('a', 'b', 'c', translate(node, 'o', 'a'), 'd')"), STR("abcfaabard")); } +TEST(xpath_unknown_functions) +{ + char_t query[] = STR("a()"); + + for (char ch = 'a'; ch <= 'z'; ++ch) + { + query[0] = ch; + CHECK_XPATH_FAIL(query); + + query[0] = ch - 32; + CHECK_XPATH_FAIL(query); + } +} #endif diff --git a/tests/test_xpath_operators.cpp b/tests/test_xpath_operators.cpp index 1a97c7d..c2281e6 100644 --- a/tests/test_xpath_operators.cpp +++ b/tests/test_xpath_operators.cpp @@ -332,11 +332,21 @@ TEST_XML(xpath_operators_inequality_node_set_node_set, "<node><c1><v>1</v><v>-1< CHECK_XPATH_BOOLEAN(n, STR("c1/v < c3/v"), true); CHECK_XPATH_BOOLEAN(n, STR("c1/v <= c3/v"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v[1] > c1/v[1]"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v[1] < c1/v[1]"), false); + CHECK_XPATH_BOOLEAN(n, STR("c1/v[1] >= c1/v[1]"), true); + CHECK_XPATH_BOOLEAN(n, STR("c1/v[1] <= c1/v[1]"), true); + #ifndef MSVC6_NAN_BUG CHECK_XPATH_BOOLEAN(n, STR("c1/v > c2/v"), false); CHECK_XPATH_BOOLEAN(n, STR("c1/v >= c2/v"), true); CHECK_XPATH_BOOLEAN(n, STR("c1/v < c2/v"), true); CHECK_XPATH_BOOLEAN(n, STR("c1/v <= c2/v"), true); + + CHECK_XPATH_BOOLEAN(n, STR("c2/v[2] < c2/v[2]"), false); + CHECK_XPATH_BOOLEAN(n, STR("c2/v[2] > c2/v[2]"), false); + CHECK_XPATH_BOOLEAN(n, STR("c2/v[2] <= c2/v[2]"), false); + CHECK_XPATH_BOOLEAN(n, STR("c2/v[2] >= c2/v[2]"), false); #endif } diff --git a/tests/test_xpath_parse.cpp b/tests/test_xpath_parse.cpp index b6de42e..8819a5d 100644 --- a/tests/test_xpath_parse.cpp +++ b/tests/test_xpath_parse.cpp @@ -274,7 +274,7 @@ TEST_XML(xpath_parse_absolute, "<div><s/></div>") TEST(xpath_parse_out_of_memory_first_page) { - test_runner::_memory_fail_threshold = 1; + test_runner::_memory_fail_threshold = 128; CHECK_ALLOC_FAIL(CHECK_XPATH_FAIL(STR("1"))); } @@ -293,6 +293,27 @@ TEST(xpath_parse_out_of_memory_string_to_number) CHECK_ALLOC_FAIL(CHECK_XPATH_FAIL(STR("0.11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111"))); } +TEST(xpath_parse_out_of_memory_quoted_string) +{ + test_runner::_memory_fail_threshold = 4096 + 128; + + std::basic_string<char_t> literal(5000, 'a'); + std::basic_string<char_t> query = STR("'") + literal + STR("'"); + + CHECK_ALLOC_FAIL(CHECK_XPATH_FAIL(query.c_str())); +} + +TEST(xpath_parse_out_of_memory_variable) +{ + test_runner::_memory_fail_threshold = 4096 + 128; + + std::basic_string<char_t> literal(5000, 'a'); + std::basic_string<char_t> query = STR("$") + literal; + + xpath_variable_set vars; + CHECK_ALLOC_FAIL(CHECK_XPATH_FAIL_VAR(query.c_str(), &vars)); +} + TEST(xpath_parse_qname_error) { CHECK_XPATH_FAIL(STR("foo: bar")); @@ -313,4 +334,57 @@ TEST(xpath_parse_result_default) CHECK(result.offset == 0); } +TEST(xpath_parse_error_propagation) +{ + char_t query[] = STR("(//foo[count(. | @*)] | ((a)//b)[1] | /foo | /foo/bar//more/ancestor-or-self::foobar | /text() | a[1 + 2 * 3 div (1+0) mod 2]//b[1]/c | a[$x])[true()]"); + + xpath_variable_set vars; + vars.set(STR("x"), 1.0); + + xpath_query q(query, &vars); + CHECK(q); + + for (size_t i = 0; i + 1 < sizeof(query) / sizeof(query[0]); ++i) + { + char_t ch = query[i]; + + query[i] = '%'; + + CHECK_XPATH_FAIL(query); + + query[i] = ch; + } +} + +TEST(xpath_parse_oom_propagation) +{ + const char_t* query_base = STR("(//foo[count(. | @*)] | ((a)//b)[1] | /foo | /foo/bar//more/ancestor-or-self::foobar | /text() | a[1 + 2 * 3 div (1+0) mod 2]//b[1]/c | a[$x])[true()]"); + + xpath_variable_set vars; + vars.set(STR("x"), 1.0); + + test_runner::_memory_fail_threshold = 4096 + 128; + + { + xpath_query q(query_base, &vars); + CHECK(q); + } + + for (size_t i = 3200; i < 4200; ++i) + { + std::basic_string<char_t> literal(i, 'a'); + std::basic_string<char_t> query = STR("processing-instruction('") + literal + STR("') | ") + query_base; + + CHECK_ALLOC_FAIL(CHECK_XPATH_FAIL(query.c_str())); + } +} + +TEST_XML(xpath_parse_location_path, "<node><child/></node>") +{ + CHECK_XPATH_NODESET(doc, STR("/node")) % 2; + CHECK_XPATH_NODESET(doc, STR("/@*")); + CHECK_XPATH_NODESET(doc, STR("/.")) % 1; + CHECK_XPATH_NODESET(doc, STR("/..")); + CHECK_XPATH_NODESET(doc, STR("/*")) % 2; +} #endif diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index 69215d8..7915df1 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -358,6 +358,13 @@ TEST_XML_FLAGS(xpath_paths_nodetest_principal, "<node attr='value'>pcdata<child/ CHECK_XPATH_NODESET(doc, STR("child::abra:*/attribute::abra:*/descendant-or-self::abra:*")); // attribute is not of element type } +TEST_XML(xpath_paths_nodetest_attribute_namespace, "<node a1='v1' xmlns:x='?' />") +{ + CHECK_XPATH_NODESET(doc, STR("node/attribute::node()")) % 3; + CHECK_XPATH_NODESET(doc, STR("node/attribute::xmlns:x")); + CHECK_XPATH_NODESET(doc, STR("node/attribute::xmlns:*")); +} + TEST_XML(xpath_paths_absolute, "<node attr='value'><foo><foo/><foo/></foo></node>") { xml_node c; |