From f3c29ef3d8252b42aa66c85d21ea622c17975037 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Tue, 19 Jul 2011 15:20:20 +0000 Subject: Implemented support for non-seekable streams git-svn-id: http://pugixml.googlecode.com/svn/trunk@809 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 7 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 0df89d2..df194b9 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3175,7 +3175,92 @@ namespace } #ifndef PUGIXML_NO_STL - template xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream& stream, unsigned int options, xml_encoding encoding) + struct xml_stream_chunk + { + static xml_stream_chunk* create() + { + void* memory = global_allocate(sizeof(xml_stream_chunk)); + + return new (memory) xml_stream_chunk(); + } + + static void destroy(void* ptr) + { + xml_stream_chunk* chunk = static_cast(ptr); + + // free chunk chain + while (chunk) + { + xml_stream_chunk* next = chunk->next; + global_deallocate(chunk); + chunk = next; + } + } + + xml_stream_chunk(): next(0), size(0) + { + } + + xml_stream_chunk* next; + size_t size; + + // so that we can use this for both wchar_t and char data (char -> wchar_t casting is prohibited by strict aliasing) + wchar_t data[xml_memory_page_size / sizeof(wchar_t)]; + }; + + template xml_parse_status load_stream_data_noseek(std::basic_istream& stream, void** out_buffer, size_t* out_size) + { + buffer_holder chunks(0, xml_stream_chunk::destroy); + + // read file to a chunk list + size_t total = 0; + xml_stream_chunk* last = 0; + + while (!stream.eof()) + { + // allocate new chunk + xml_stream_chunk* chunk = xml_stream_chunk::create(); + if (!chunk) return status_out_of_memory; + + // append chunk to list + if (last) last = last->next = chunk; + else chunks.data = last = chunk; + + // read data to chunk + stream.read(reinterpret_cast(chunk->data), static_cast(sizeof(chunk->data) / sizeof(T))); + chunk->size = static_cast(stream.gcount()) * sizeof(T); + + // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors + if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; + + // guard against huge files (chunk size is small enough to make this overflow check work) + if (total + chunk->size < total) return status_out_of_memory; + total += chunk->size; + } + + // copy chunk list to a contiguous buffer + char* buffer = static_cast(global_allocate(total)); + if (!buffer) return status_out_of_memory; + + char* write = buffer; + + for (xml_stream_chunk* chunk = static_cast(chunks.data); chunk; chunk = chunk->next) + { + assert(write + chunk->size <= buffer + total); + memcpy(write, chunk->data, chunk->size); + write += chunk->size; + } + + assert(write == buffer + total); + + // return buffer + *out_buffer = buffer; + *out_size = total; + + return status_ok; + } + + template xml_parse_status load_stream_data_seek(std::basic_istream& stream, void** out_buffer, size_t* out_size) { // get length of remaining data in stream typename std::basic_istream::pos_type pos = stream.tellg(); @@ -3183,28 +3268,43 @@ namespace std::streamoff length = stream.tellg() - pos; stream.seekg(pos); - if (stream.fail() || pos < 0) return make_parse_result(status_io_error); + if (stream.fail() || pos < 0) return status_io_error; // guard against huge files size_t read_length = static_cast(length); - if (static_cast(read_length) != length || length < 0) return make_parse_result(status_out_of_memory); + if (static_cast(read_length) != length || length < 0) return status_out_of_memory; // read stream data into memory (guard against stream exceptions with buffer holder) buffer_holder buffer(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T)), global_deallocate); - if (!buffer.data) return make_parse_result(status_out_of_memory); + if (!buffer.data) return status_out_of_memory; stream.read(static_cast(buffer.data), static_cast(read_length)); // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors - if (stream.bad()) return make_parse_result(status_io_error); + if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; - // load data from buffer + // return buffer size_t actual_length = static_cast(stream.gcount()); assert(actual_length <= read_length); - return doc.load_buffer_inplace_own(buffer.release(), actual_length * sizeof(T), options, encoding); + *out_buffer = buffer.release(); + *out_size = actual_length * sizeof(T); + + return status_ok; } + + template xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream& stream, unsigned int options, xml_encoding encoding) + { + void* buffer = 0; + size_t size = 0; + + // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) + xml_parse_status status = (stream.tellg() < 0) ? load_stream_data_noseek(stream, &buffer, &size) : load_stream_data_seek(stream, &buffer, &size); + if (status != status_ok) return make_parse_result(status); + + return doc.load_buffer_inplace_own(buffer, size, options, encoding); + } #endif #if defined(MSVC_CRT_VERSION) || defined(__BORLANDC__) || defined(__MINGW32__) -- cgit v1.2.3