From f11c4d684703a240e9ba0b9b347052f141d00753 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 29 Jan 2017 20:00:44 -0800 Subject: XPath: alloc_string no longer returns NULL NULL return value will be reserved for the OOM error indicator. --- src/pugixml.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 0813ae6..8e9e42b 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -10953,20 +10953,19 @@ PUGI__NS_BEGIN const char_t* alloc_string(const xpath_lexer_string& value) { - if (value.begin) - { - size_t length = static_cast(value.end - value.begin); + if (!value.begin) + return PUGIXML_TEXT(""); - char_t* c = static_cast(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); - if (!c) throw_error_oom(); - assert(c); // workaround for clang static analysis + size_t length = static_cast(value.end - value.begin); - memcpy(c, value.begin, length * sizeof(char_t)); - c[length] = 0; + char_t* c = static_cast(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); + if (!c) throw_error_oom(); + assert(c); // workaround for clang static analysis - return c; - } - else return 0; + memcpy(c, value.begin, length * sizeof(char_t)); + c[length] = 0; + + return c; } xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) -- cgit v1.2.3 From 60e580c2a81ef4047f35cb2ea40baa0ee426bfaf Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 29 Jan 2017 20:04:34 -0800 Subject: XPath: Remove parse_function_helper It was only used in three places and didn't really make the code more readable. --- src/pugixml.cpp | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 8e9e42b..0818148 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -10968,16 +10968,6 @@ PUGI__NS_BEGIN return c; } - xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) - { - assert(argc <= 1); - - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) - throw_error("Function has to be applied to node set"); - - return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); - } - xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) { switch (name.begin[0]) @@ -10991,9 +10981,7 @@ PUGI__NS_BEGIN case 'c': if (name == PUGIXML_TEXT("count") && argc == 1) { - if (args[0]->rettype() != xpath_type_node_set) - throw_error("Function has to be applied to node set"); - + if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); } else if (name == PUGIXML_TEXT("contains") && argc == 2) @@ -11025,15 +11013,24 @@ PUGI__NS_BEGIN else if (name == PUGIXML_TEXT("lang") && argc == 1) return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); else if (name == PUGIXML_TEXT("local-name") && argc <= 1) - return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); + } break; case 'n': if (name == PUGIXML_TEXT("name") && argc <= 1) - return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); + } else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) - return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); + } else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("not") && argc == 1) -- cgit v1.2.3 From d72c0763f9dc6298b34eef7d17a14ea76e7c1016 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 29 Jan 2017 20:15:14 -0800 Subject: XPath: Minor parsing refactoring Simplify function argument parsing by folding arg 0 parsing into the main loop, reuse expression parsing logic for unary expression --- src/pugixml.cpp | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 0818148..906ec04 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11236,6 +11236,7 @@ PUGI__NS_BEGIN const char_t* value = alloc_string(_lexer.contents()); xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); + _lexer.next(); return n; @@ -11249,6 +11250,7 @@ PUGI__NS_BEGIN throw_error_oom(); xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); + _lexer.next(); return n; @@ -11268,14 +11270,14 @@ PUGI__NS_BEGIN throw_error("Unrecognized function call"); _lexer.next(); - if (_lexer.current() != lex_close_brace) - args[argc++] = parse_expression(); - while (_lexer.current() != lex_close_brace) { - if (_lexer.current() != lex_comma) - throw_error("No comma between function arguments"); - _lexer.next(); + if (argc > 0) + { + if (_lexer.current() != lex_comma) + throw_error("No comma between function arguments"); + _lexer.next(); + } xpath_ast_node* n = parse_expression(); @@ -11561,7 +11563,8 @@ PUGI__NS_BEGIN while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; - if (*state != '(') return parse_location_path(); + if (*state != '(') + return parse_location_path(); // This looks like a function call; however this still can be a node-test. Check it. if (parse_node_test_type(_lexer.contents()) != nodetest_none) @@ -11594,9 +11597,9 @@ PUGI__NS_BEGIN _lexer.next(); // precedence 7+ - only parses union expressions - xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); + xpath_ast_node* n = parse_expression(7); - return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); + return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, n); } else { @@ -11718,9 +11721,11 @@ PUGI__NS_BEGIN // | MultiplicativeExpr '*' UnaryExpr // | MultiplicativeExpr 'div' UnaryExpr // | MultiplicativeExpr 'mod' UnaryExpr - xpath_ast_node* parse_expression() + xpath_ast_node* parse_expression(int limit = 0) { - return parse_expression_rec(parse_path_or_unary_expression(), 0); + xpath_ast_node* n = parse_path_or_unary_expression(); + + return parse_expression_rec(n, limit); } xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) -- cgit v1.2.3 From 7bb433b1418c801950a1c867dd851036c16cee32 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 29 Jan 2017 20:22:49 -0800 Subject: XPath: Assume that every function can fail and return 0 Propagate the failure to the caller manually. This is a first step to parser structure that does not depend on exceptions or longjmp for error handling (and thus matches the XML parser). To preserve semantics we'll have to convert error code to exception later. --- src/pugixml.cpp | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 906ec04..4266f46 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11222,6 +11222,7 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* n = parse_expression(); + if (!n) return 0; if (_lexer.current() != lex_close_brace) throw_error("Unmatched braces"); @@ -11234,12 +11235,11 @@ PUGI__NS_BEGIN case lex_quoted_string: { const char_t* value = alloc_string(_lexer.contents()); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); + if (!value) return 0; _lexer.next(); - return n; + return new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); } case lex_number: @@ -11249,11 +11249,9 @@ PUGI__NS_BEGIN if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) throw_error_oom(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); - _lexer.next(); - return n; + return new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); } case lex_string: @@ -11280,6 +11278,7 @@ PUGI__NS_BEGIN } xpath_ast_node* n = parse_expression(); + if (!n) return 0; if (argc < 2) args[argc] = n; else last_arg->set_next(n); @@ -11306,17 +11305,20 @@ PUGI__NS_BEGIN xpath_ast_node* parse_filter_expression() { xpath_ast_node* n = parse_primary_expression(); + if (!n) return 0; while (_lexer.current() == lex_open_square_brace) { _lexer.next(); xpath_ast_node* expr = parse_expression(); + if (!expr) return 0; if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); + if (!n) return 0; if (_lexer.current() != lex_close_square_brace) throw_error("Unmatched square brace"); @@ -11461,7 +11463,10 @@ PUGI__NS_BEGIN } const char_t* nt_name_copy = alloc_string(nt_name); + if (!nt_name_copy) return 0; + xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name_copy); + if (!n) return 0; xpath_ast_node* last = 0; @@ -11470,8 +11475,10 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* expr = parse_expression(); + if (!expr) return 0; xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); + if (!pred) return 0; if (_lexer.current() != lex_close_square_brace) throw_error("Unmatched square brace"); @@ -11490,6 +11497,7 @@ PUGI__NS_BEGIN xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) { xpath_ast_node* n = parse_step(set); + if (!n) return 0; while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { @@ -11497,9 +11505,13 @@ PUGI__NS_BEGIN _lexer.next(); if (l == lex_double_slash) + { n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; + } n = parse_step(n); + if (!n) return 0; } return n; @@ -11514,6 +11526,7 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + if (!n) return 0; // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path lexeme_t l = _lexer.current(); @@ -11528,7 +11541,10 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + if (!n) return 0; + n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; return parse_relative_location_path(n); } @@ -11551,7 +11567,6 @@ PUGI__NS_BEGIN // PrimaryExpr begins with '$' in case of it being a variable reference, // '(' in case of it being an expression, string literal, number constant or // function call. - if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || _lexer.current() == lex_string) @@ -11572,6 +11587,7 @@ PUGI__NS_BEGIN } xpath_ast_node* n = parse_filter_expression(); + if (!n) return 0; if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { @@ -11584,6 +11600,7 @@ PUGI__NS_BEGIN throw_error("Step has to be applied to node set"); n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; } // select from location path @@ -11598,6 +11615,7 @@ PUGI__NS_BEGIN // precedence 7+ - only parses union expressions xpath_ast_node* n = parse_expression(7); + if (!n) return 0; return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, n); } @@ -11682,12 +11700,14 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* rhs = parse_path_or_unary_expression(); + if (!rhs) return 0; binary_op_t nextop = binary_op_t::parse(_lexer); while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) { rhs = parse_expression_rec(rhs, nextop.precedence); + if (!rhs) return 0; nextop = binary_op_t::parse(_lexer); } @@ -11696,6 +11716,7 @@ PUGI__NS_BEGIN throw_error("Union operator has to be applied to node sets"); lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); + if (!lhs) return 0; op = binary_op_t::parse(_lexer); } @@ -11724,6 +11745,7 @@ PUGI__NS_BEGIN xpath_ast_node* parse_expression(int limit = 0) { xpath_ast_node* n = parse_path_or_unary_expression(); + if (!n) return 0; return parse_expression_rec(n, limit); } @@ -11734,13 +11756,14 @@ PUGI__NS_BEGIN xpath_ast_node* parse() { - xpath_ast_node* result = parse_expression(); + xpath_ast_node* n = parse_expression(); + if (!n) return 0; // check if there are unparsed tokens left if (_lexer.current() != lex_eof) throw_error("Incorrect query"); - return result; + return n; } static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) -- cgit v1.2.3 From 293fccf3b017225e6e6de0675e9fda0dccbf9c01 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 29 Jan 2017 20:41:18 -0800 Subject: XPath: Do not use exceptions to propagate parsing errors Instead, return 0 and rely on parsing logic to propagate that all the way down, and convert result to exception to maintain existing interface. --- src/pugixml.cpp | 86 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 45 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 4266f46..8f2fb75 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -10921,25 +10921,24 @@ PUGI__NS_BEGIN jmp_buf _error_handler; #endif - void throw_error(const char* message) + void throw_error_oom() { - _result->error = message; + #ifdef PUGIXML_NO_EXCEPTIONS + _result->error = "Out of memory"; _result->offset = _lexer.current_pos() - _query; - #ifdef PUGIXML_NO_EXCEPTIONS longjmp(_error_handler, 1); #else - throw xpath_exception(*_result); + throw std::bad_alloc(); #endif } - void throw_error_oom() + xpath_ast_node* error(const char* message) { - #ifdef PUGIXML_NO_EXCEPTIONS - throw_error("Out of memory"); - #else - throw std::bad_alloc(); - #endif + _result->error = message; + _result->offset = _lexer.current_pos() - _query; + + return 0; } void* alloc_node() @@ -10981,7 +10980,7 @@ PUGI__NS_BEGIN case 'c': if (name == PUGIXML_TEXT("count") && argc == 1) { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); } else if (name == PUGIXML_TEXT("contains") && argc == 2) @@ -11014,7 +11013,7 @@ PUGI__NS_BEGIN return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); else if (name == PUGIXML_TEXT("local-name") && argc <= 1) { - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); } @@ -11023,12 +11022,12 @@ PUGI__NS_BEGIN case 'n': if (name == PUGIXML_TEXT("name") && argc <= 1) { - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); } else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) { - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); } else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) @@ -11067,7 +11066,7 @@ PUGI__NS_BEGIN return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("sum") && argc == 1) { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); } @@ -11085,9 +11084,7 @@ PUGI__NS_BEGIN break; } - throw_error("Unrecognized function or wrong parameter count"); - - return 0; + return error("Unrecognized function or wrong parameter count"); } axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) @@ -11203,14 +11200,14 @@ PUGI__NS_BEGIN xpath_lexer_string name = _lexer.contents(); if (!_variables) - throw_error("Unknown variable: variable set is not provided"); + return error("Unknown variable: variable set is not provided"); xpath_variable* var = 0; if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) throw_error_oom(); if (!var) - throw_error("Unknown variable: variable set does not contain the given name"); + return error("Unknown variable: variable set does not contain the given name"); _lexer.next(); @@ -11225,7 +11222,7 @@ PUGI__NS_BEGIN if (!n) return 0; if (_lexer.current() != lex_close_brace) - throw_error("Unmatched braces"); + return error("Unmatched braces"); _lexer.next(); @@ -11265,7 +11262,7 @@ PUGI__NS_BEGIN xpath_ast_node* last_arg = 0; if (_lexer.current() != lex_open_brace) - throw_error("Unrecognized function call"); + return error("Unrecognized function call"); _lexer.next(); while (_lexer.current() != lex_close_brace) @@ -11273,7 +11270,7 @@ PUGI__NS_BEGIN if (argc > 0) { if (_lexer.current() != lex_comma) - throw_error("No comma between function arguments"); + return error("No comma between function arguments"); _lexer.next(); } @@ -11293,9 +11290,7 @@ PUGI__NS_BEGIN } default: - throw_error("Unrecognizable primary expression"); - - return 0; + return error("Unrecognizable primary expression"); } } @@ -11315,13 +11310,13 @@ PUGI__NS_BEGIN if (!expr) return 0; if (n->rettype() != xpath_type_node_set) - throw_error("Predicate has to be applied to node set"); + return error("Predicate has to be applied to node set"); n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); if (!n) return 0; if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); + return error("Unmatched square brace"); _lexer.next(); } @@ -11337,7 +11332,7 @@ PUGI__NS_BEGIN xpath_ast_node* parse_step(xpath_ast_node* set) { if (set && set->rettype() != xpath_type_node_set) - throw_error("Step has to be applied to node set"); + return error("Step has to be applied to node set"); bool axis_specified = false; axis_t axis = axis_child; // implied child axis @@ -11376,12 +11371,12 @@ PUGI__NS_BEGIN { // parse axis name if (axis_specified) - throw_error("Two axis specifiers in one step"); + return error("Two axis specifiers in one step"); axis = parse_axis_name(nt_name, axis_specified); if (!axis_specified) - throw_error("Unknown axis"); + return error("Unknown axis"); // read actual node test _lexer.next(); @@ -11397,7 +11392,10 @@ PUGI__NS_BEGIN nt_name = _lexer.contents(); _lexer.next(); } - else throw_error("Unrecognized node test"); + else + { + return error("Unrecognized node test"); + } } if (nt_type == nodetest_none) @@ -11414,26 +11412,26 @@ PUGI__NS_BEGIN nt_type = parse_node_test_type(nt_name); if (nt_type == nodetest_none) - throw_error("Unrecognized node type"); + return error("Unrecognized node type"); nt_name = xpath_lexer_string(); } else if (nt_name == PUGIXML_TEXT("processing-instruction")) { if (_lexer.current() != lex_quoted_string) - throw_error("Only literals are allowed as arguments to processing-instruction()"); + return error("Only literals are allowed as arguments to processing-instruction()"); nt_type = nodetest_pi; nt_name = _lexer.contents(); _lexer.next(); if (_lexer.current() != lex_close_brace) - throw_error("Unmatched brace near processing-instruction()"); + return error("Unmatched brace near processing-instruction()"); _lexer.next(); } else { - throw_error("Unmatched brace near node type test"); + return error("Unmatched brace near node type test"); } } // QName or NCName:* @@ -11459,7 +11457,7 @@ PUGI__NS_BEGIN } else { - throw_error("Unrecognized node test"); + return error("Unrecognized node test"); } const char_t* nt_name_copy = alloc_string(nt_name); @@ -11481,7 +11479,7 @@ PUGI__NS_BEGIN if (!pred) return 0; if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); + return error("Unmatched square brace"); _lexer.next(); if (last) last->set_next(pred); @@ -11597,7 +11595,7 @@ PUGI__NS_BEGIN if (l == lex_double_slash) { if (n->rettype() != xpath_type_node_set) - throw_error("Step has to be applied to node set"); + return error("Step has to be applied to node set"); n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); if (!n) return 0; @@ -11713,7 +11711,7 @@ PUGI__NS_BEGIN } if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) - throw_error("Union operator has to be applied to node sets"); + return error("Union operator has to be applied to node sets"); lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); if (!lhs) return 0; @@ -11761,7 +11759,7 @@ PUGI__NS_BEGIN // check if there are unparsed tokens left if (_lexer.current() != lex_eof) - throw_error("Incorrect query"); + return error("Incorrect query"); return n; } @@ -12393,6 +12391,12 @@ namespace pugi _impl = impl.release(); _result.error = 0; } + else + { + #ifndef PUGIXML_NO_EXCEPTIONS + throw xpath_exception(_result); + #endif + } } } -- cgit v1.2.3 From bd8e2d782eca41886d871ea511ff72ad27fa2cb0 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 29 Jan 2017 20:54:48 -0800 Subject: XPath: Forward all node constructors through alloc_node This allows us to handle OOM during node allocation without triggering undefined behavior that occurs when placement new gets a NULL pointer. --- src/pugixml.cpp | 121 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 78 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 8f2fb75..44c77c7 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -10944,12 +10944,47 @@ PUGI__NS_BEGIN void* alloc_node() { void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); - if (!result) throw_error_oom(); return result; } + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; + } + const char_t* alloc_string(const xpath_lexer_string& value) { if (!value.begin) @@ -10973,7 +11008,7 @@ PUGI__NS_BEGIN { case 'b': if (name == PUGIXML_TEXT("boolean") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); + return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); break; @@ -10981,40 +11016,40 @@ PUGI__NS_BEGIN if (name == PUGIXML_TEXT("count") && argc == 1) { if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); + return alloc_node(ast_func_count, xpath_type_number, args[0]); } else if (name == PUGIXML_TEXT("contains") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); + return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); else if (name == PUGIXML_TEXT("concat") && argc >= 2) - return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("ceiling") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); + return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); break; case 'f': if (name == PUGIXML_TEXT("false") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); + return alloc_node(ast_func_false, xpath_type_boolean); else if (name == PUGIXML_TEXT("floor") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); + return alloc_node(ast_func_floor, xpath_type_number, args[0]); break; case 'i': if (name == PUGIXML_TEXT("id") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); + return alloc_node(ast_func_id, xpath_type_node_set, args[0]); break; case 'l': if (name == PUGIXML_TEXT("last") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); + return alloc_node(ast_func_last, xpath_type_number); else if (name == PUGIXML_TEXT("lang") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); + return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); else if (name == PUGIXML_TEXT("local-name") && argc <= 1) { if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); + return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); } break; @@ -11023,60 +11058,60 @@ PUGI__NS_BEGIN if (name == PUGIXML_TEXT("name") && argc <= 1) { if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); + return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); } else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) { if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); + return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); } else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); + return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("not") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); + return alloc_node(ast_func_not, xpath_type_boolean, args[0]); else if (name == PUGIXML_TEXT("number") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); + return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); break; case 'p': if (name == PUGIXML_TEXT("position") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); + return alloc_node(ast_func_position, xpath_type_number); break; case 'r': if (name == PUGIXML_TEXT("round") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); + return alloc_node(ast_func_round, xpath_type_number, args[0]); break; case 's': if (name == PUGIXML_TEXT("string") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); + return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); else if (name == PUGIXML_TEXT("string-length") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); + return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); else if (name == PUGIXML_TEXT("starts-with") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); + return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); else if (name == PUGIXML_TEXT("substring-before") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("substring-after") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) - return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); + return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("sum") && argc == 1) { if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); + return alloc_node(ast_func_sum, xpath_type_number, args[0]); } break; case 't': if (name == PUGIXML_TEXT("translate") && argc == 3) - return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("true") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); + return alloc_node(ast_func_true, xpath_type_boolean); break; @@ -11211,7 +11246,7 @@ PUGI__NS_BEGIN _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); + return alloc_node(ast_variable, var->type(), var); } case lex_open_brace: @@ -11236,7 +11271,7 @@ PUGI__NS_BEGIN _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); + return alloc_node(ast_string_constant, xpath_type_string, value); } case lex_number: @@ -11248,7 +11283,7 @@ PUGI__NS_BEGIN _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); + return alloc_node(ast_number_constant, xpath_type_number, value); } case lex_string: @@ -11312,7 +11347,7 @@ PUGI__NS_BEGIN if (n->rettype() != xpath_type_node_set) return error("Predicate has to be applied to node set"); - n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); + n = alloc_node(ast_filter, n, expr, predicate_default); if (!n) return 0; if (_lexer.current() != lex_close_square_brace) @@ -11348,13 +11383,13 @@ PUGI__NS_BEGIN { _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); + return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); } else if (_lexer.current() == lex_double_dot) { _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); + return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); } nodetest_t nt_type = nodetest_none; @@ -11463,7 +11498,7 @@ PUGI__NS_BEGIN const char_t* nt_name_copy = alloc_string(nt_name); if (!nt_name_copy) return 0; - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name_copy); + xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); if (!n) return 0; xpath_ast_node* last = 0; @@ -11475,7 +11510,7 @@ PUGI__NS_BEGIN xpath_ast_node* expr = parse_expression(); if (!expr) return 0; - xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); + xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); if (!pred) return 0; if (_lexer.current() != lex_close_square_brace) @@ -11504,7 +11539,7 @@ PUGI__NS_BEGIN if (l == lex_double_slash) { - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); if (!n) return 0; } @@ -11523,7 +11558,7 @@ PUGI__NS_BEGIN { _lexer.next(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); if (!n) return 0; // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path @@ -11538,10 +11573,10 @@ PUGI__NS_BEGIN { _lexer.next(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); if (!n) return 0; - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); if (!n) return 0; return parse_relative_location_path(n); @@ -11597,7 +11632,7 @@ PUGI__NS_BEGIN if (n->rettype() != xpath_type_node_set) return error("Step has to be applied to node set"); - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); if (!n) return 0; } @@ -11615,7 +11650,7 @@ PUGI__NS_BEGIN xpath_ast_node* n = parse_expression(7); if (!n) return 0; - return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, n); + return alloc_node(ast_op_negate, xpath_type_number, n); } else { @@ -11713,7 +11748,7 @@ PUGI__NS_BEGIN if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) return error("Union operator has to be applied to node sets"); - lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); + lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); if (!lhs) return 0; op = binary_op_t::parse(_lexer); -- cgit v1.2.3 From 4fa2241d7b713f96c4b8dbb5188f38e0ba39a114 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 29 Jan 2017 20:59:22 -0800 Subject: XPath: Route out-of-memory errors through the exceptionless path We currently need to convert error based on the text to a different type of C++ exceptions when C++ exceptions are enabled. --- src/pugixml.cpp | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 44c77c7..ed878de 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -10917,22 +10917,6 @@ PUGI__NS_BEGIN char_t _scratch[32]; - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf _error_handler; - #endif - - void throw_error_oom() - { - #ifdef PUGIXML_NO_EXCEPTIONS - _result->error = "Out of memory"; - _result->offset = _lexer.current_pos() - _query; - - longjmp(_error_handler, 1); - #else - throw std::bad_alloc(); - #endif - } - xpath_ast_node* error(const char* message) { _result->error = message; @@ -10941,10 +10925,15 @@ PUGI__NS_BEGIN return 0; } + xpath_ast_node* error_oom() + { + return error("Out of memory"); + } + void* alloc_node() { void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); - if (!result) throw_error_oom(); + if (!result) return error_oom(); return result; } @@ -10993,8 +10982,11 @@ PUGI__NS_BEGIN size_t length = static_cast(value.end - value.begin); char_t* c = static_cast(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); - if (!c) throw_error_oom(); - assert(c); // workaround for clang static analysis + if (!c) + { + error_oom(); + return c; + } memcpy(c, value.begin, length * sizeof(char_t)); c[length] = 0; @@ -11239,7 +11231,7 @@ PUGI__NS_BEGIN xpath_variable* var = 0; if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) - throw_error_oom(); + return error_oom(); if (!var) return error("Unknown variable: variable set does not contain the given name"); @@ -11279,7 +11271,7 @@ PUGI__NS_BEGIN double value = 0; if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) - throw_error_oom(); + return error_oom(); _lexer.next(); @@ -11803,13 +11795,7 @@ PUGI__NS_BEGIN { xpath_parser parser(query, variables, alloc, result); - #ifdef PUGIXML_NO_EXCEPTIONS - int error = setjmp(parser._error_handler); - - return (error == 0) ? parser.parse() : 0; - #else return parser.parse(); - #endif } }; @@ -12429,7 +12415,10 @@ namespace pugi else { #ifndef PUGIXML_NO_EXCEPTIONS - throw xpath_exception(_result); + if (strcmp(_result.error, "Out of memory")) + throw xpath_exception(_result); + else + throw std::bad_alloc(); #endif } } -- cgit v1.2.3 From 6abf1d7c1a735cbec3b42cb569683d00b070f46c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 29 Jan 2017 21:53:23 -0800 Subject: XPath: Minor error handling refactoring Handle node type error before creating expression node --- src/pugixml.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index ed878de..f52d236 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11333,12 +11333,12 @@ PUGI__NS_BEGIN { _lexer.next(); - xpath_ast_node* expr = parse_expression(); - if (!expr) return 0; - if (n->rettype() != xpath_type_node_set) return error("Predicate has to be applied to node set"); + xpath_ast_node* expr = parse_expression(); + if (!expr) return 0; + n = alloc_node(ast_filter, n, expr, predicate_default); if (!n) return 0; -- cgit v1.2.3 From 635fe028015c6091a7765e1c1d23c7512fb3f64e Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 29 Jan 2017 21:58:12 -0800 Subject: XPath: Provide non-throwing and throwing allocations in xpath_allocator For both allocate and reallocate, provide both _nothrow and _throw functions; this change renames allocate() to allocate_throw() (same for reallocate) to make it easier to change the code to remove throwing variants. --- src/pugixml.cpp | 71 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index f52d236..6b9fc89 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7442,24 +7442,7 @@ PUGI__NS_BEGIN } } - void* allocate(size_t size) - { - void* result = allocate_nothrow(size); - - if (!result) - { - #ifdef PUGIXML_NO_EXCEPTIONS - assert(error_handler); - longjmp(*error_handler, 1); - #else - throw std::bad_alloc(); - #endif - } - - return result; - } - - void* reallocate(void* ptr, size_t old_size, size_t new_size) + void* reallocate_nothrow(void* ptr, size_t old_size, size_t new_size) { // round size up to block alignment boundary old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); @@ -7474,8 +7457,8 @@ PUGI__NS_BEGIN if (ptr) _root_size -= old_size; // allocate a new version (this will obviously reuse the memory if possible) - void* result = allocate(new_size); - assert(result); + void* result = allocate_nothrow(new_size); + if (!result) return 0; // we have a new block if (result != ptr && ptr) @@ -7504,6 +7487,40 @@ PUGI__NS_BEGIN return result; } + void* allocate_throw(size_t size) + { + void* result = allocate_nothrow(size); + + if (!result) + { + #ifdef PUGIXML_NO_EXCEPTIONS + assert(error_handler); + longjmp(*error_handler, 1); + #else + throw std::bad_alloc(); + #endif + } + + return result; + } + + void* reallocate_throw(void* ptr, size_t old_size, size_t new_size) + { + void* result = reallocate_nothrow(ptr, old_size, new_size); + + if (!result) + { + #ifdef PUGIXML_NO_EXCEPTIONS + assert(error_handler); + longjmp(*error_handler, 1); + #else + throw std::bad_alloc(); + #endif + } + + return result; + } + void revert(const xpath_allocator& state) { // free all new pages @@ -7602,7 +7619,7 @@ PUGI__NS_BEGIN static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) { - char_t* result = static_cast(alloc->allocate((length + 1) * sizeof(char_t))); + char_t* result = static_cast(alloc->allocate_throw((length + 1) * sizeof(char_t))); assert(result); memcpy(result, string, length * sizeof(char_t)); @@ -7659,7 +7676,7 @@ PUGI__NS_BEGIN size_t result_length = target_length + source_length; // allocate new buffer - char_t* result = static_cast(alloc->reallocate(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); + char_t* result = static_cast(alloc->reallocate_throw(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); assert(result); // append first string to the new buffer in case there was no reallocation @@ -8116,7 +8133,7 @@ PUGI__NS_BEGIN // allocate a buffer of suitable length for the number size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; - char_t* result = static_cast(alloc->allocate(sizeof(char_t) * result_size)); + char_t* result = static_cast(alloc->allocate_throw(sizeof(char_t) * result_size)); assert(result); // make the number! @@ -8748,7 +8765,7 @@ PUGI__NS_BEGIN if (size_ + count > capacity) { // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); + xpath_node* data = static_cast(alloc->reallocate_throw(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); assert(data); // finalize @@ -8800,7 +8817,7 @@ PUGI__NS_BEGIN size_t new_capacity = capacity + capacity / 2 + 1; // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); + xpath_node* data = static_cast(alloc->reallocate_throw(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); assert(data); // finalize @@ -10386,7 +10403,7 @@ PUGI__NS_BEGIN // allocate on-heap for large concats if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) { - buffer = static_cast(stack.temp->allocate(count * sizeof(xpath_string))); + buffer = static_cast(stack.temp->allocate_throw(count * sizeof(xpath_string))); assert(buffer); } @@ -10404,7 +10421,7 @@ PUGI__NS_BEGIN for (size_t i = 0; i < count; ++i) length += buffer[i].length(); // create final string - char_t* result = static_cast(stack.result->allocate((length + 1) * sizeof(char_t))); + char_t* result = static_cast(stack.result->allocate_throw((length + 1) * sizeof(char_t))); assert(result); char_t* ri = result; -- cgit v1.2.3 From bc1e444694427d599710107d3e6b62165380b0b6 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Jan 2017 08:52:32 -0800 Subject: XPath: Track allocation errors more explicitly Any time an allocation fails xpath_allocator can set an externally provided bool. The plan is to keep this bool up until evaluation ends, so that we can use it to discard the potentially malformed result. --- src/pugixml.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 6b9fc89..396061a 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7396,13 +7396,14 @@ PUGI__NS_BEGIN { xpath_memory_block* _root; size_t _root_size; + bool* _error; public: #ifdef PUGIXML_NO_EXCEPTIONS jmp_buf* error_handler; #endif - xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) + xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) { #ifdef PUGIXML_NO_EXCEPTIONS error_handler = 0; @@ -7430,7 +7431,11 @@ PUGI__NS_BEGIN size_t block_size = block_capacity + offsetof(xpath_memory_block, data); xpath_memory_block* block = static_cast(xml_memory::allocate(block_size)); - if (!block) return 0; + if (!block) + { + if (_error) *_error = true; + return 0; + } block->next = _root; block->capacity = block_capacity; @@ -7579,6 +7584,7 @@ PUGI__NS_BEGIN struct xpath_stack_data { + bool error; xpath_memory_block blocks[2]; xpath_allocator result; xpath_allocator temp; @@ -7588,7 +7594,7 @@ PUGI__NS_BEGIN jmp_buf error_handler; #endif - xpath_stack_data(): result(blocks + 0), temp(blocks + 1) + xpath_stack_data(): error(false), result(blocks + 0, &error), temp(blocks + 1, &error) { blocks[0].next = blocks[1].next = 0; blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); @@ -11856,7 +11862,9 @@ PUGI__NS_BEGIN xpath_context c(n, 1, 1); - return impl->root->eval_string(c, sd.stack); + xpath_string r = impl->root->eval_string(c, sd.stack); + + return sd.error ? xpath_string() : r; } PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) @@ -12494,7 +12502,9 @@ namespace pugi if (setjmp(sd.error_handler)) return false; #endif - return static_cast(_impl)->root->eval_boolean(c, sd.stack); + bool r = static_cast(_impl)->root->eval_boolean(c, sd.stack); + + return sd.error ? false : r; } PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const @@ -12508,7 +12518,9 @@ namespace pugi if (setjmp(sd.error_handler)) return impl::gen_nan(); #endif - return static_cast(_impl)->root->eval_number(c, sd.stack); + double r = static_cast(_impl)->root->eval_number(c, sd.stack); + + return sd.error ? impl::gen_nan() : r; } #ifndef PUGIXML_NO_STL @@ -12556,7 +12568,7 @@ namespace pugi impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); - return xpath_node_set(r.begin(), r.end(), r.type()); + return sd.error ? xpath_node_set() : xpath_node_set(r.begin(), r.end(), r.type()); } PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const @@ -12573,7 +12585,7 @@ namespace pugi impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); - return r.first(); + return sd.error ? xpath_node() : r.first(); } PUGI__FN const xpath_parse_result& xpath_query::result() const -- cgit v1.2.3 From 1ed6d2102bc0a08648026ece8cc069fda592a6a2 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Jan 2017 08:54:09 -0800 Subject: XPath: Improve error message for expressions like .[1] W3C specification does not allow predicates after abbreviated steps. Currently this results in parsing terminating at the step, which leads to confusing error messages like "Invalid query" or "Unmatched braces". --- src/pugixml.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 396061a..94bd8ae 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11398,12 +11398,18 @@ PUGI__NS_BEGIN { _lexer.next(); + if (_lexer.current() == lex_open_square_brace) + return error("Predicates are not allowed after an abbreviated step"); + return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); } else if (_lexer.current() == lex_double_dot) { _lexer.next(); + if (_lexer.current() == lex_open_square_brace) + return error("Predicates are not allowed after an abbreviated step"); + return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); } -- cgit v1.2.3 From 1b3e8614e7638fb7e87ec7e85bfbe8447432c53c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Jan 2017 08:57:07 -0800 Subject: XPath: Reword brace mismatch errors for clarity --- src/pugixml.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 94bd8ae..c6fee40 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11272,7 +11272,7 @@ PUGI__NS_BEGIN if (!n) return 0; if (_lexer.current() != lex_close_brace) - return error("Unmatched braces"); + return error("Expected ')' to match an opening '('"); _lexer.next(); @@ -11366,7 +11366,7 @@ PUGI__NS_BEGIN if (!n) return 0; if (_lexer.current() != lex_close_square_brace) - return error("Unmatched square brace"); + return error("Expected ']' to match an opening '['"); _lexer.next(); } @@ -11535,7 +11535,7 @@ PUGI__NS_BEGIN if (!pred) return 0; if (_lexer.current() != lex_close_square_brace) - return error("Unmatched square brace"); + return error("Expected ']' to match an opening '['"); _lexer.next(); if (last) last->set_next(pred); -- cgit v1.2.3 From ac150d504e0e1fa54eb042325c87f08740f1d4f6 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Jan 2017 21:45:01 -0800 Subject: XPath: Throw std::bad_alloc if we got an out-of-memory error This allows us to gradually convert exception handling of out-of-memory during evaluation to a non-throwing approach without changing the observable behavior. --- src/pugixml.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index c6fee40..2e25551 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11870,6 +11870,10 @@ PUGI__NS_BEGIN xpath_string r = impl->root->eval_string(c, sd.stack); + #ifndef PUGIXML_NO_EXCEPTIONS + if (sd.error) throw std::bad_alloc(); + #endif + return sd.error ? xpath_string() : r; } @@ -12510,6 +12514,10 @@ namespace pugi bool r = static_cast(_impl)->root->eval_boolean(c, sd.stack); + #ifndef PUGIXML_NO_EXCEPTIONS + if (sd.error) throw std::bad_alloc(); + #endif + return sd.error ? false : r; } @@ -12526,6 +12534,10 @@ namespace pugi double r = static_cast(_impl)->root->eval_number(c, sd.stack); + #ifndef PUGIXML_NO_EXCEPTIONS + if (sd.error) throw std::bad_alloc(); + #endif + return sd.error ? impl::gen_nan() : r; } @@ -12574,6 +12586,10 @@ namespace pugi impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); + #ifndef PUGIXML_NO_EXCEPTIONS + if (sd.error) throw std::bad_alloc(); + #endif + return sd.error ? xpath_node_set() : xpath_node_set(r.begin(), r.end(), r.type()); } @@ -12591,6 +12607,10 @@ namespace pugi impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); + #ifndef PUGIXML_NO_EXCEPTIONS + if (sd.error) throw std::bad_alloc(); + #endif + return sd.error ? xpath_node() : r.first(); } -- cgit v1.2.3 From 1a2e4b88ee091613d2978fb2d23ce146416d3413 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Jan 2017 21:46:03 -0800 Subject: XPath: Use nonthrowing allocations in duplicate_string This requires explicit error handling for xpath_string::data calls. --- src/pugixml.cpp | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 2e25551..56cd941 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7625,8 +7625,8 @@ PUGI__NS_BEGIN static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) { - char_t* result = static_cast(alloc->allocate_throw((length + 1) * sizeof(char_t))); - assert(result); + char_t* result = static_cast(alloc->allocate_nothrow((length + 1) * sizeof(char_t))); + if (!result) return 0; memcpy(result, string, length * sizeof(char_t)); result[length] = 0; @@ -7655,9 +7655,13 @@ PUGI__NS_BEGIN { assert(begin <= end); + if (begin == end) + return xpath_string(); + size_t length = static_cast(end - begin); + const char_t* data = duplicate_string(begin, length, alloc); - return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); + return data ? xpath_string(data, true, length) : xpath_string(); } xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) @@ -7715,8 +7719,11 @@ PUGI__NS_BEGIN if (!_uses_heap) { size_t length_ = strlength(_buffer); + const char_t* data_ = duplicate_string(_buffer, length_, alloc); + + if (!data_) return 0; - _buffer = duplicate_string(_buffer, length_, alloc); + _buffer = data_; _uses_heap = true; _length_heap = length_; } @@ -10595,6 +10602,8 @@ PUGI__NS_BEGIN xpath_string s = string_value(c.n, stack.result); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = normalize_space(begin); return xpath_string::from_heap_preallocated(begin, end); @@ -10605,6 +10614,8 @@ PUGI__NS_BEGIN xpath_string s = _left->eval_string(c, stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = normalize_space(begin); return xpath_string::from_heap_preallocated(begin, end); @@ -10621,6 +10632,8 @@ PUGI__NS_BEGIN xpath_string to = _right->_next->eval_string(c, swapped_stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); return xpath_string::from_heap_preallocated(begin, end); @@ -10631,6 +10644,8 @@ PUGI__NS_BEGIN xpath_string s = _left->eval_string(c, stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = translate_table(begin, _data.table); return xpath_string::from_heap_preallocated(begin, end); -- cgit v1.2.3 From c370d1190d3c1ac5ea59f0824db500f32a8594bd Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Jan 2017 22:10:13 -0800 Subject: XPath: Fix reallocate_nothrow to preserve existing state Instead of rolling back the allocation and trying to allocate again, explicitly handle inplace reallocate if possible, and allocate a new block otherwise. This is going to be important once we use reallocate_nothrow from a non-throwing context. --- src/pugixml.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 56cd941..7a44105 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7456,33 +7456,35 @@ PUGI__NS_BEGIN // we can only reallocate the last object assert(ptr == 0 || static_cast(ptr) + old_size == &_root->data[0] + _root_size); - // adjust root size so that we have not allocated the object at all - bool only_object = (_root_size == old_size); - - if (ptr) _root_size -= old_size; + // try to reallocate the object inplace + if (ptr && _root_size - old_size + new_size <= _root->capacity) + { + _root_size = _root_size - old_size + new_size; + return ptr; + } - // allocate a new version (this will obviously reuse the memory if possible) + // allocate a new block void* result = allocate_nothrow(new_size); if (!result) return 0; // we have a new block - if (result != ptr && ptr) + if (ptr) { - // copy old data + // copy old data (we only support growing) assert(new_size >= old_size); memcpy(result, ptr, old_size); // free the previous page if it had no other objects - if (only_object) - { - assert(_root->data == result); - assert(_root->next); + assert(_root->data == result); + assert(_root->next); + if (_root->next->data == ptr) + { + // deallocate the whole page, unless it was the first one xpath_memory_block* next = _root->next->next; if (next) { - // deallocate the whole page, unless it was the first one xml_memory::deallocate(_root->next); _root->next = next; } -- cgit v1.2.3 From 9e40c5853276c53458438bb4da37de6a2adbbdca Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Jan 2017 22:28:57 -0800 Subject: XPath: Replace all (re)allocate_throw with (re)allocate_nothrow This generates some out-of-memory code paths that are not covered by existing tests, which will need to be resolved later. --- src/pugixml.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 7a44105..86c14cb 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7688,8 +7688,8 @@ PUGI__NS_BEGIN size_t result_length = target_length + source_length; // allocate new buffer - char_t* result = static_cast(alloc->reallocate_throw(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); - assert(result); + char_t* result = static_cast(alloc->reallocate_nothrow(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); + if (!result) return; // append first string to the new buffer in case there was no reallocation if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); @@ -8148,8 +8148,8 @@ PUGI__NS_BEGIN // allocate a buffer of suitable length for the number size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; - char_t* result = static_cast(alloc->allocate_throw(sizeof(char_t) * result_size)); - assert(result); + char_t* result = static_cast(alloc->allocate_nothrow(sizeof(char_t) * result_size)); + if (!result) return xpath_string(); // make the number! char_t* s = result; @@ -8780,8 +8780,8 @@ PUGI__NS_BEGIN if (size_ + count > capacity) { // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate_throw(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); - assert(data); + xpath_node* data = static_cast(alloc->reallocate_nothrow(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); + if (!data) return; // finalize _begin = data; @@ -8832,8 +8832,8 @@ PUGI__NS_BEGIN size_t new_capacity = capacity + capacity / 2 + 1; // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate_throw(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); - assert(data); + xpath_node* data = static_cast(alloc->reallocate_nothrow(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); + if (!data) return; // finalize _begin = data; @@ -10418,8 +10418,8 @@ PUGI__NS_BEGIN // allocate on-heap for large concats if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) { - buffer = static_cast(stack.temp->allocate_throw(count * sizeof(xpath_string))); - assert(buffer); + buffer = static_cast(stack.temp->allocate_nothrow(count * sizeof(xpath_string))); + if (!buffer) return xpath_string(); } // evaluate all strings to temporary stack @@ -10436,8 +10436,8 @@ PUGI__NS_BEGIN for (size_t i = 0; i < count; ++i) length += buffer[i].length(); // create final string - char_t* result = static_cast(stack.result->allocate_throw((length + 1) * sizeof(char_t))); - assert(result); + char_t* result = static_cast(stack.result->allocate_nothrow((length + 1) * sizeof(char_t))); + if (!result) return xpath_string(); char_t* ri = result; -- cgit v1.2.3 From f500435cb49c3b532cec562bb63470af0c9e6a31 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Jan 2017 22:31:57 -0800 Subject: XPath: Remove (re)allocate_throw and setjmp Now error handling in XPath implementation relies on explicit error propagation and is converted to an appropriate result at the end. --- src/pugixml.cpp | 106 ++++++++------------------------------------------------ 1 file changed, 15 insertions(+), 91 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 86c14cb..79614cb 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -29,9 +29,6 @@ #ifndef PUGIXML_NO_XPATH # include # include -# ifdef PUGIXML_NO_EXCEPTIONS -# include -# endif #endif #ifndef PUGIXML_NO_STL @@ -47,10 +44,8 @@ # pragma warning(push) # pragma warning(disable: 4127) // conditional expression is constant # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) -# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable # pragma warning(disable: 4702) // unreachable code # pragma warning(disable: 4996) // this function or variable may be unsafe -# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged #endif #ifdef __INTEL_COMPILER @@ -7399,18 +7394,11 @@ PUGI__NS_BEGIN bool* _error; public: - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf* error_handler; - #endif - xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) { - #ifdef PUGIXML_NO_EXCEPTIONS - error_handler = 0; - #endif } - void* allocate_nothrow(size_t size) + void* allocate(size_t size) { // round size up to block alignment boundary size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); @@ -7447,7 +7435,7 @@ PUGI__NS_BEGIN } } - void* reallocate_nothrow(void* ptr, size_t old_size, size_t new_size) + void* reallocate(void* ptr, size_t old_size, size_t new_size) { // round size up to block alignment boundary old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); @@ -7464,7 +7452,7 @@ PUGI__NS_BEGIN } // allocate a new block - void* result = allocate_nothrow(new_size); + void* result = allocate(new_size); if (!result) return 0; // we have a new block @@ -7494,40 +7482,6 @@ PUGI__NS_BEGIN return result; } - void* allocate_throw(size_t size) - { - void* result = allocate_nothrow(size); - - if (!result) - { - #ifdef PUGIXML_NO_EXCEPTIONS - assert(error_handler); - longjmp(*error_handler, 1); - #else - throw std::bad_alloc(); - #endif - } - - return result; - } - - void* reallocate_throw(void* ptr, size_t old_size, size_t new_size) - { - void* result = reallocate_nothrow(ptr, old_size, new_size); - - if (!result) - { - #ifdef PUGIXML_NO_EXCEPTIONS - assert(error_handler); - longjmp(*error_handler, 1); - #else - throw std::bad_alloc(); - #endif - } - - return result; - } - void revert(const xpath_allocator& state) { // free all new pages @@ -7592,10 +7546,6 @@ PUGI__NS_BEGIN xpath_allocator temp; xpath_stack stack; - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf error_handler; - #endif - xpath_stack_data(): error(false), result(blocks + 0, &error), temp(blocks + 1, &error) { blocks[0].next = blocks[1].next = 0; @@ -7603,10 +7553,6 @@ PUGI__NS_BEGIN stack.result = &result; stack.temp = &temp; - - #ifdef PUGIXML_NO_EXCEPTIONS - result.error_handler = temp.error_handler = &error_handler; - #endif } ~xpath_stack_data() @@ -7627,7 +7573,7 @@ PUGI__NS_BEGIN static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) { - char_t* result = static_cast(alloc->allocate_nothrow((length + 1) * sizeof(char_t))); + char_t* result = static_cast(alloc->allocate((length + 1) * sizeof(char_t))); if (!result) return 0; memcpy(result, string, length * sizeof(char_t)); @@ -7688,7 +7634,7 @@ PUGI__NS_BEGIN size_t result_length = target_length + source_length; // allocate new buffer - char_t* result = static_cast(alloc->reallocate_nothrow(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); + char_t* result = static_cast(alloc->reallocate(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); if (!result) return; // append first string to the new buffer in case there was no reallocation @@ -8148,7 +8094,7 @@ PUGI__NS_BEGIN // allocate a buffer of suitable length for the number size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; - char_t* result = static_cast(alloc->allocate_nothrow(sizeof(char_t) * result_size)); + char_t* result = static_cast(alloc->allocate(sizeof(char_t) * result_size)); if (!result) return xpath_string(); // make the number! @@ -8433,12 +8379,10 @@ PUGI__NS_BEGIN if (!table[i]) table[i] = static_cast(i); - void* result = alloc->allocate_nothrow(sizeof(table)); + void* result = alloc->allocate(sizeof(table)); + if (!result) return 0; - if (result) - { - memcpy(result, table, sizeof(table)); - } + memcpy(result, table, sizeof(table)); return static_cast(result); } @@ -8780,7 +8724,7 @@ PUGI__NS_BEGIN if (size_ + count > capacity) { // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate_nothrow(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); + xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); if (!data) return; // finalize @@ -8832,7 +8776,7 @@ PUGI__NS_BEGIN size_t new_capacity = capacity + capacity / 2 + 1; // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate_nothrow(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); + xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); if (!data) return; // finalize @@ -10418,7 +10362,7 @@ PUGI__NS_BEGIN // allocate on-heap for large concats if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) { - buffer = static_cast(stack.temp->allocate_nothrow(count * sizeof(xpath_string))); + buffer = static_cast(stack.temp->allocate(count * sizeof(xpath_string))); if (!buffer) return xpath_string(); } @@ -10436,7 +10380,7 @@ PUGI__NS_BEGIN for (size_t i = 0; i < count; ++i) length += buffer[i].length(); // create final string - char_t* result = static_cast(stack.result->allocate_nothrow((length + 1) * sizeof(char_t))); + char_t* result = static_cast(stack.result->allocate((length + 1) * sizeof(char_t))); if (!result) return xpath_string(); char_t* ri = result; @@ -10972,7 +10916,7 @@ PUGI__NS_BEGIN void* alloc_node() { - void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); + void* result = _alloc->allocate(sizeof(xpath_ast_node)); if (!result) return error_oom(); return result; @@ -11021,7 +10965,7 @@ PUGI__NS_BEGIN size_t length = static_cast(value.end - value.begin); - char_t* c = static_cast(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); + char_t* c = static_cast(_alloc->allocate((length + 1) * sizeof(char_t))); if (!c) { error_oom(); @@ -11879,10 +11823,6 @@ PUGI__NS_BEGIN { if (!impl) return xpath_string(); - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_string(); - #endif - xpath_context c(n, 1, 1); xpath_string r = impl->root->eval_string(c, sd.stack); @@ -12525,10 +12465,6 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return false; - #endif - bool r = static_cast(_impl)->root->eval_boolean(c, sd.stack); #ifndef PUGIXML_NO_EXCEPTIONS @@ -12545,10 +12481,6 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return impl::gen_nan(); - #endif - double r = static_cast(_impl)->root->eval_number(c, sd.stack); #ifndef PUGIXML_NO_EXCEPTIONS @@ -12597,10 +12529,6 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node_set(); - #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); #ifndef PUGIXML_NO_EXCEPTIONS @@ -12618,10 +12546,6 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node(); - #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); #ifndef PUGIXML_NO_EXCEPTIONS -- cgit v1.2.3 From 9c7897b8d231e6b34b0dd18c528a95e848d45f71 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 30 Jan 2017 23:55:31 -0800 Subject: Remove null pointer test from first_element_by_path All other functions treat null pointer inputs as invalid; now this function does as well. --- src/pugixml.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 79614cb..8f93819 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -6132,7 +6132,7 @@ namespace pugi { xml_node found = *this; // Current search context. - if (!_root || !path_ || !path_[0]) return found; + if (!_root || !path_[0]) return found; if (path_[0] == delimiter) { -- cgit v1.2.3 From 0e3ccc73965b10c078fba31967c5d5500dade767 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 1 Feb 2017 21:05:37 -0800 Subject: Remove redundant branch from xml_node::path() The code works fine regardless of the *j->name check, and omitting this makes the code more symmetric between the "count" and "write" stage; additionally this improves coverage - due to how strcpy_insitu works it's not really possible to get an empty non-NULL name in the node. --- src/pugixml.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 8f93819..67e0cfe 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -6113,7 +6113,7 @@ namespace pugi if (j != _root) result[--offset] = delimiter; - if (j->name && *j->name) + if (j->name) { size_t length = impl::strlength(j->name); -- cgit v1.2.3 From 33159924b121f705428b26d1a1649735e1abbf19 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 2 Feb 2017 18:40:20 -0800 Subject: XPath: Clean up out-of-memory parse error handling Instead of relying on a specific string in the parse result, use allocator error state to report the error and then convert it to a string if necessary. We currently have to manually trigger the OOM error in two places because we use global allocator in rare cases; we don't really need to do this so this will be cleaned up later. --- src/pugixml.cpp | 56 ++++++++++++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 67e0cfe..9d1be44 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7387,13 +7387,12 @@ PUGI__NS_BEGIN }; }; - class xpath_allocator + struct xpath_allocator { xpath_memory_block* _root; size_t _root_size; bool* _error; - public: xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) { } @@ -7540,13 +7539,13 @@ PUGI__NS_BEGIN struct xpath_stack_data { - bool error; xpath_memory_block blocks[2]; xpath_allocator result; xpath_allocator temp; xpath_stack stack; + bool oom; - xpath_stack_data(): error(false), result(blocks + 0, &error), temp(blocks + 1, &error) + xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) { blocks[0].next = blocks[1].next = 0; blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); @@ -10911,15 +10910,15 @@ PUGI__NS_BEGIN xpath_ast_node* error_oom() { - return error("Out of memory"); + assert(_alloc->_error); + *_alloc->_error = true; + + return 0; } void* alloc_node() { - void* result = _alloc->allocate(sizeof(xpath_ast_node)); - if (!result) return error_oom(); - - return result; + return _alloc->allocate(sizeof(xpath_ast_node)); } xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) @@ -10966,11 +10965,7 @@ PUGI__NS_BEGIN size_t length = static_cast(value.end - value.begin); char_t* c = static_cast(_alloc->allocate((length + 1) * sizeof(char_t))); - if (!c) - { - error_oom(); - return c; - } + if (!c) return 0; memcpy(c, value.begin, length * sizeof(char_t)); c[length] = 0; @@ -11808,7 +11803,7 @@ PUGI__NS_BEGIN xml_memory::deallocate(impl); } - xpath_query_impl(): root(0), alloc(&block) + xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) { block.next = 0; block.capacity = sizeof(block.data); @@ -11817,6 +11812,7 @@ PUGI__NS_BEGIN xpath_ast_node* root; xpath_allocator alloc; xpath_memory_block block; + bool oom; }; PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) @@ -11828,10 +11824,10 @@ PUGI__NS_BEGIN xpath_string r = impl->root->eval_string(c, sd.stack); #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.error) throw std::bad_alloc(); + if (sd.oom) throw std::bad_alloc(); #endif - return sd.error ? xpath_string() : r; + return sd.oom ? xpath_string() : r; } PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) @@ -12406,11 +12402,11 @@ namespace pugi } else { - #ifndef PUGIXML_NO_EXCEPTIONS - if (strcmp(_result.error, "Out of memory")) - throw xpath_exception(_result); - else - throw std::bad_alloc(); + #ifdef PUGIXML_NO_EXCEPTIONS + if (qimpl->oom) _result.error = "Out of memory"; + #else + if (qimpl->oom) throw std::bad_alloc(); + throw xpath_exception(_result); #endif } } @@ -12468,10 +12464,10 @@ namespace pugi bool r = static_cast(_impl)->root->eval_boolean(c, sd.stack); #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.error) throw std::bad_alloc(); + if (sd.oom) throw std::bad_alloc(); #endif - return sd.error ? false : r; + return sd.oom ? false : r; } PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const @@ -12484,10 +12480,10 @@ namespace pugi double r = static_cast(_impl)->root->eval_number(c, sd.stack); #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.error) throw std::bad_alloc(); + if (sd.oom) throw std::bad_alloc(); #endif - return sd.error ? impl::gen_nan() : r; + return sd.oom ? impl::gen_nan() : r; } #ifndef PUGIXML_NO_STL @@ -12532,10 +12528,10 @@ namespace pugi impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.error) throw std::bad_alloc(); + if (sd.oom) throw std::bad_alloc(); #endif - return sd.error ? xpath_node_set() : xpath_node_set(r.begin(), r.end(), r.type()); + return sd.oom ? xpath_node_set() : xpath_node_set(r.begin(), r.end(), r.type()); } PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const @@ -12549,10 +12545,10 @@ namespace pugi impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.error) throw std::bad_alloc(); + if (sd.oom) throw std::bad_alloc(); #endif - return sd.error ? xpath_node() : r.first(); + return sd.oom ? xpath_node() : r.first(); } PUGI__FN const xpath_parse_result& xpath_query::result() const -- cgit v1.2.3 From bcc7ed57a2b6155c30ec871bb12ffae110fcf558 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 3 Feb 2017 20:33:40 -0800 Subject: XPath: Simplify evaluation error flow Instead of having two checks for out-of-memory when exceptions are enabled, do just one and decide what to do based on whether we can throw. --- src/pugixml.cpp | 65 +++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 9d1be44..7e6fe64 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11823,11 +11823,16 @@ PUGI__NS_BEGIN xpath_string r = impl->root->eval_string(c, sd.stack); - #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.oom) throw std::bad_alloc(); - #endif + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_string(); + #else + throw std::bad_alloc(); + #endif + } - return sd.oom ? xpath_string() : r; + return r; } PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) @@ -12463,11 +12468,16 @@ namespace pugi bool r = static_cast(_impl)->root->eval_boolean(c, sd.stack); - #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.oom) throw std::bad_alloc(); - #endif + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return false; + #else + throw std::bad_alloc(); + #endif + } - return sd.oom ? false : r; + return r; } PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const @@ -12479,11 +12489,16 @@ namespace pugi double r = static_cast(_impl)->root->eval_number(c, sd.stack); - #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.oom) throw std::bad_alloc(); - #endif + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return impl::gen_nan(); + #else + throw std::bad_alloc(); + #endif + } - return sd.oom ? impl::gen_nan() : r; + return r; } #ifndef PUGIXML_NO_STL @@ -12527,11 +12542,16 @@ namespace pugi impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); - #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.oom) throw std::bad_alloc(); - #endif + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_node_set(); + #else + throw std::bad_alloc(); + #endif + } - return sd.oom ? xpath_node_set() : xpath_node_set(r.begin(), r.end(), r.type()); + return xpath_node_set(r.begin(), r.end(), r.type()); } PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const @@ -12544,11 +12564,16 @@ namespace pugi impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); - #ifndef PUGIXML_NO_EXCEPTIONS - if (sd.oom) throw std::bad_alloc(); - #endif + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_node(); + #else + throw std::bad_alloc(); + #endif + } - return sd.oom ? xpath_node() : r.first(); + return r.first(); } PUGI__FN const xpath_parse_result& xpath_query::result() const -- cgit v1.2.3