| [d2596b] | 1 | /** | 
|---|
|  | 2 | * pugixml parser - version 1.0 | 
|---|
|  | 3 | * -------------------------------------------------------- | 
|---|
|  | 4 | * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) | 
|---|
|  | 5 | * Report bugs and download new versions at http://pugixml.org/ | 
|---|
|  | 6 | * | 
|---|
|  | 7 | * This library is distributed under the MIT License. See notice at the end | 
|---|
|  | 8 | * of this file. | 
|---|
|  | 9 | * | 
|---|
|  | 10 | * This work is based on the pugxml parser, which is: | 
|---|
|  | 11 | * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) | 
|---|
|  | 12 | */ | 
|---|
|  | 13 |  | 
|---|
|  | 14 | #include "pugixml.hpp" | 
|---|
|  | 15 |  | 
|---|
|  | 16 | #include <stdlib.h> | 
|---|
|  | 17 | #include <stdio.h> | 
|---|
|  | 18 | #include <string.h> | 
|---|
|  | 19 | #include <assert.h> | 
|---|
|  | 20 | #include <setjmp.h> | 
|---|
|  | 21 | #include <wchar.h> | 
|---|
|  | 22 |  | 
|---|
|  | 23 | #ifndef PUGIXML_NO_XPATH | 
|---|
|  | 24 | #       include <math.h> | 
|---|
|  | 25 | #       include <float.h> | 
|---|
|  | 26 | #endif | 
|---|
|  | 27 |  | 
|---|
|  | 28 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 29 | #       include <istream> | 
|---|
|  | 30 | #       include <ostream> | 
|---|
|  | 31 | #       include <string> | 
|---|
|  | 32 | #endif | 
|---|
|  | 33 |  | 
|---|
|  | 34 | // For placement new | 
|---|
|  | 35 | #include <new> | 
|---|
|  | 36 |  | 
|---|
|  | 37 | #ifdef _MSC_VER | 
|---|
|  | 38 | #       pragma warning(disable: 4127) // conditional expression is constant | 
|---|
|  | 39 | #       pragma warning(disable: 4324) // structure was padded due to __declspec(align()) | 
|---|
|  | 40 | #       pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable | 
|---|
|  | 41 | #       pragma warning(disable: 4702) // unreachable code | 
|---|
|  | 42 | #       pragma warning(disable: 4996) // this function or variable may be unsafe | 
|---|
|  | 43 | #endif | 
|---|
|  | 44 |  | 
|---|
|  | 45 | #ifdef __INTEL_COMPILER | 
|---|
|  | 46 | #       pragma warning(disable: 177) // function was declared but never referenced | 
|---|
|  | 47 | #       pragma warning(disable: 279) // controlling expression is constant | 
|---|
|  | 48 | #       pragma warning(disable: 1478 1786) // function was declared "deprecated" | 
|---|
|  | 49 | #endif | 
|---|
|  | 50 |  | 
|---|
|  | 51 | #ifdef __BORLANDC__ | 
|---|
|  | 52 | #       pragma warn -8008 // condition is always false | 
|---|
|  | 53 | #       pragma warn -8066 // unreachable code | 
|---|
|  | 54 | #endif | 
|---|
|  | 55 |  | 
|---|
|  | 56 | #ifdef __SNC__ | 
|---|
|  | 57 | #       pragma diag_suppress=178 // function was declared but never referenced | 
|---|
|  | 58 | #       pragma diag_suppress=237 // controlling expression is constant | 
|---|
|  | 59 | #endif | 
|---|
|  | 60 |  | 
|---|
|  | 61 | // uintptr_t | 
|---|
|  | 62 | #if !defined(_MSC_VER) || _MSC_VER >= 1600 | 
|---|
|  | 63 | #       include <stdint.h> | 
|---|
|  | 64 | #else | 
|---|
|  | 65 | #       if _MSC_VER < 1300 | 
|---|
|  | 66 | // No native uintptr_t in MSVC6 | 
|---|
|  | 67 | typedef size_t uintptr_t; | 
|---|
|  | 68 | #       endif | 
|---|
|  | 69 | typedef unsigned __int8 uint8_t; | 
|---|
|  | 70 | typedef unsigned __int16 uint16_t; | 
|---|
|  | 71 | typedef unsigned __int32 uint32_t; | 
|---|
|  | 72 | typedef __int32 int32_t; | 
|---|
|  | 73 | #endif | 
|---|
|  | 74 |  | 
|---|
|  | 75 | // Inlining controls | 
|---|
|  | 76 | #if defined(_MSC_VER) && _MSC_VER >= 1300 | 
|---|
|  | 77 | #       define PUGIXML_NO_INLINE __declspec(noinline) | 
|---|
|  | 78 | #elif defined(__GNUC__) | 
|---|
|  | 79 | #       define PUGIXML_NO_INLINE __attribute__((noinline)) | 
|---|
|  | 80 | #else | 
|---|
|  | 81 | #       define PUGIXML_NO_INLINE | 
|---|
|  | 82 | #endif | 
|---|
|  | 83 |  | 
|---|
|  | 84 | // Simple static assertion | 
|---|
|  | 85 | #define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } | 
|---|
|  | 86 |  | 
|---|
|  | 87 | // Digital Mars C++ bug workaround for passing char loaded from memory via stack | 
|---|
|  | 88 | #ifdef __DMC__ | 
|---|
|  | 89 | #       define DMC_VOLATILE volatile | 
|---|
|  | 90 | #else | 
|---|
|  | 91 | #       define DMC_VOLATILE | 
|---|
|  | 92 | #endif | 
|---|
|  | 93 |  | 
|---|
|  | 94 | using namespace pugi; | 
|---|
|  | 95 |  | 
|---|
|  | 96 | // Memory allocation | 
|---|
|  | 97 | namespace | 
|---|
|  | 98 | { | 
|---|
|  | 99 | void* default_allocate(size_t size) | 
|---|
|  | 100 | { | 
|---|
|  | 101 | return malloc(size); | 
|---|
|  | 102 | } | 
|---|
|  | 103 |  | 
|---|
|  | 104 | void default_deallocate(void* ptr) | 
|---|
|  | 105 | { | 
|---|
|  | 106 | free(ptr); | 
|---|
|  | 107 | } | 
|---|
|  | 108 |  | 
|---|
|  | 109 | allocation_function global_allocate = default_allocate; | 
|---|
|  | 110 | deallocation_function global_deallocate = default_deallocate; | 
|---|
|  | 111 | } | 
|---|
|  | 112 |  | 
|---|
|  | 113 | // String utilities | 
|---|
|  | 114 | namespace | 
|---|
|  | 115 | { | 
|---|
|  | 116 | // Get string length | 
|---|
|  | 117 | size_t strlength(const char_t* s) | 
|---|
|  | 118 | { | 
|---|
|  | 119 | assert(s); | 
|---|
|  | 120 |  | 
|---|
|  | 121 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 122 | return wcslen(s); | 
|---|
|  | 123 | #else | 
|---|
|  | 124 | return strlen(s); | 
|---|
|  | 125 | #endif | 
|---|
|  | 126 | } | 
|---|
|  | 127 |  | 
|---|
|  | 128 | // Compare two strings | 
|---|
|  | 129 | bool strequal(const char_t* src, const char_t* dst) | 
|---|
|  | 130 | { | 
|---|
|  | 131 | assert(src && dst); | 
|---|
|  | 132 |  | 
|---|
|  | 133 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 134 | return wcscmp(src, dst) == 0; | 
|---|
|  | 135 | #else | 
|---|
|  | 136 | return strcmp(src, dst) == 0; | 
|---|
|  | 137 | #endif | 
|---|
|  | 138 | } | 
|---|
|  | 139 |  | 
|---|
|  | 140 | // Compare lhs with [rhs_begin, rhs_end) | 
|---|
|  | 141 | bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) | 
|---|
|  | 142 | { | 
|---|
|  | 143 | for (size_t i = 0; i < count; ++i) | 
|---|
|  | 144 | if (lhs[i] != rhs[i]) | 
|---|
|  | 145 | return false; | 
|---|
|  | 146 |  | 
|---|
|  | 147 | return lhs[count] == 0; | 
|---|
|  | 148 | } | 
|---|
|  | 149 |  | 
|---|
|  | 150 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 151 | // Convert string to wide string, assuming all symbols are ASCII | 
|---|
|  | 152 | void widen_ascii(wchar_t* dest, const char* source) | 
|---|
|  | 153 | { | 
|---|
|  | 154 | for (const char* i = source; *i; ++i) *dest++ = *i; | 
|---|
|  | 155 | *dest = 0; | 
|---|
|  | 156 | } | 
|---|
|  | 157 | #endif | 
|---|
|  | 158 | } | 
|---|
|  | 159 |  | 
|---|
|  | 160 | #if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH) | 
|---|
|  | 161 | // auto_ptr-like buffer holder for exception recovery | 
|---|
|  | 162 | namespace | 
|---|
|  | 163 | { | 
|---|
|  | 164 | struct buffer_holder | 
|---|
|  | 165 | { | 
|---|
|  | 166 | void* data; | 
|---|
|  | 167 | void (*deleter)(void*); | 
|---|
|  | 168 |  | 
|---|
|  | 169 | buffer_holder(void* data, void (*deleter)(void*)): data(data), deleter(deleter) | 
|---|
|  | 170 | { | 
|---|
|  | 171 | } | 
|---|
|  | 172 |  | 
|---|
|  | 173 | ~buffer_holder() | 
|---|
|  | 174 | { | 
|---|
|  | 175 | if (data) deleter(data); | 
|---|
|  | 176 | } | 
|---|
|  | 177 |  | 
|---|
|  | 178 | void* release() | 
|---|
|  | 179 | { | 
|---|
|  | 180 | void* result = data; | 
|---|
|  | 181 | data = 0; | 
|---|
|  | 182 | return result; | 
|---|
|  | 183 | } | 
|---|
|  | 184 | }; | 
|---|
|  | 185 | } | 
|---|
|  | 186 | #endif | 
|---|
|  | 187 |  | 
|---|
|  | 188 | namespace | 
|---|
|  | 189 | { | 
|---|
|  | 190 | static const size_t xml_memory_page_size = 32768; | 
|---|
|  | 191 |  | 
|---|
|  | 192 | static const uintptr_t xml_memory_page_alignment = 32; | 
|---|
|  | 193 | static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); | 
|---|
|  | 194 | static const uintptr_t xml_memory_page_name_allocated_mask = 16; | 
|---|
|  | 195 | static const uintptr_t xml_memory_page_value_allocated_mask = 8; | 
|---|
|  | 196 | static const uintptr_t xml_memory_page_type_mask = 7; | 
|---|
|  | 197 |  | 
|---|
|  | 198 | struct xml_allocator; | 
|---|
|  | 199 |  | 
|---|
|  | 200 | struct xml_memory_page | 
|---|
|  | 201 | { | 
|---|
|  | 202 | static xml_memory_page* construct(void* memory) | 
|---|
|  | 203 | { | 
|---|
|  | 204 | if (!memory) return 0; //$ redundant, left for performance | 
|---|
|  | 205 |  | 
|---|
|  | 206 | xml_memory_page* result = static_cast<xml_memory_page*>(memory); | 
|---|
|  | 207 |  | 
|---|
|  | 208 | result->allocator = 0; | 
|---|
|  | 209 | result->memory = 0; | 
|---|
|  | 210 | result->prev = 0; | 
|---|
|  | 211 | result->next = 0; | 
|---|
|  | 212 | result->busy_size = 0; | 
|---|
|  | 213 | result->freed_size = 0; | 
|---|
|  | 214 |  | 
|---|
|  | 215 | return result; | 
|---|
|  | 216 | } | 
|---|
|  | 217 |  | 
|---|
|  | 218 | xml_allocator* allocator; | 
|---|
|  | 219 |  | 
|---|
|  | 220 | void* memory; | 
|---|
|  | 221 |  | 
|---|
|  | 222 | xml_memory_page* prev; | 
|---|
|  | 223 | xml_memory_page* next; | 
|---|
|  | 224 |  | 
|---|
|  | 225 | size_t busy_size; | 
|---|
|  | 226 | size_t freed_size; | 
|---|
|  | 227 |  | 
|---|
|  | 228 | char data[1]; | 
|---|
|  | 229 | }; | 
|---|
|  | 230 |  | 
|---|
|  | 231 | struct xml_memory_string_header | 
|---|
|  | 232 | { | 
|---|
|  | 233 | uint16_t page_offset; // offset from page->data | 
|---|
|  | 234 | uint16_t full_size; // 0 if string occupies whole page | 
|---|
|  | 235 | }; | 
|---|
|  | 236 |  | 
|---|
|  | 237 | struct xml_allocator | 
|---|
|  | 238 | { | 
|---|
|  | 239 | xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) | 
|---|
|  | 240 | { | 
|---|
|  | 241 | } | 
|---|
|  | 242 |  | 
|---|
|  | 243 | xml_memory_page* allocate_page(size_t data_size) | 
|---|
|  | 244 | { | 
|---|
|  | 245 | size_t size = offsetof(xml_memory_page, data) + data_size; | 
|---|
|  | 246 |  | 
|---|
|  | 247 | // allocate block with some alignment, leaving memory for worst-case padding | 
|---|
|  | 248 | void* memory = global_allocate(size + xml_memory_page_alignment); | 
|---|
|  | 249 | if (!memory) return 0; | 
|---|
|  | 250 |  | 
|---|
|  | 251 | // align upwards to page boundary | 
|---|
|  | 252 | void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); | 
|---|
|  | 253 |  | 
|---|
|  | 254 | // prepare page structure | 
|---|
|  | 255 | xml_memory_page* page = xml_memory_page::construct(page_memory); | 
|---|
|  | 256 |  | 
|---|
|  | 257 | page->memory = memory; | 
|---|
|  | 258 | page->allocator = _root->allocator; | 
|---|
|  | 259 |  | 
|---|
|  | 260 | return page; | 
|---|
|  | 261 | } | 
|---|
|  | 262 |  | 
|---|
|  | 263 | static void deallocate_page(xml_memory_page* page) | 
|---|
|  | 264 | { | 
|---|
|  | 265 | global_deallocate(page->memory); | 
|---|
|  | 266 | } | 
|---|
|  | 267 |  | 
|---|
|  | 268 | void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); | 
|---|
|  | 269 |  | 
|---|
|  | 270 | void* allocate_memory(size_t size, xml_memory_page*& out_page) | 
|---|
|  | 271 | { | 
|---|
|  | 272 | if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page); | 
|---|
|  | 273 |  | 
|---|
|  | 274 | void* buf = _root->data + _busy_size; | 
|---|
|  | 275 |  | 
|---|
|  | 276 | _busy_size += size; | 
|---|
|  | 277 |  | 
|---|
|  | 278 | out_page = _root; | 
|---|
|  | 279 |  | 
|---|
|  | 280 | return buf; | 
|---|
|  | 281 | } | 
|---|
|  | 282 |  | 
|---|
|  | 283 | void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) | 
|---|
|  | 284 | { | 
|---|
|  | 285 | if (page == _root) page->busy_size = _busy_size; | 
|---|
|  | 286 |  | 
|---|
|  | 287 | assert(ptr >= page->data && ptr < page->data + page->busy_size); | 
|---|
|  | 288 | (void)!ptr; | 
|---|
|  | 289 |  | 
|---|
|  | 290 | page->freed_size += size; | 
|---|
|  | 291 | assert(page->freed_size <= page->busy_size); | 
|---|
|  | 292 |  | 
|---|
|  | 293 | if (page->freed_size == page->busy_size) | 
|---|
|  | 294 | { | 
|---|
|  | 295 | if (page->next == 0) | 
|---|
|  | 296 | { | 
|---|
|  | 297 | assert(_root == page); | 
|---|
|  | 298 |  | 
|---|
|  | 299 | // top page freed, just reset sizes | 
|---|
|  | 300 | page->busy_size = page->freed_size = 0; | 
|---|
|  | 301 | _busy_size = 0; | 
|---|
|  | 302 | } | 
|---|
|  | 303 | else | 
|---|
|  | 304 | { | 
|---|
|  | 305 | assert(_root != page); | 
|---|
|  | 306 | assert(page->prev); | 
|---|
|  | 307 |  | 
|---|
|  | 308 | // remove from the list | 
|---|
|  | 309 | page->prev->next = page->next; | 
|---|
|  | 310 | page->next->prev = page->prev; | 
|---|
|  | 311 |  | 
|---|
|  | 312 | // deallocate | 
|---|
|  | 313 | deallocate_page(page); | 
|---|
|  | 314 | } | 
|---|
|  | 315 | } | 
|---|
|  | 316 | } | 
|---|
|  | 317 |  | 
|---|
|  | 318 | char_t* allocate_string(size_t length) | 
|---|
|  | 319 | { | 
|---|
|  | 320 | // allocate memory for string and header block | 
|---|
|  | 321 | size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); | 
|---|
|  | 322 |  | 
|---|
|  | 323 | // round size up to pointer alignment boundary | 
|---|
|  | 324 | size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1); | 
|---|
|  | 325 |  | 
|---|
|  | 326 | xml_memory_page* page; | 
|---|
|  | 327 | xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); | 
|---|
|  | 328 |  | 
|---|
|  | 329 | if (!header) return 0; | 
|---|
|  | 330 |  | 
|---|
|  | 331 | // setup header | 
|---|
|  | 332 | ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data; | 
|---|
|  | 333 |  | 
|---|
|  | 334 | assert(page_offset >= 0 && page_offset < (1 << 16)); | 
|---|
|  | 335 | header->page_offset = static_cast<uint16_t>(page_offset); | 
|---|
|  | 336 |  | 
|---|
|  | 337 | // full_size == 0 for large strings that occupy the whole page | 
|---|
|  | 338 | assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0)); | 
|---|
|  | 339 | header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0); | 
|---|
|  | 340 |  | 
|---|
|  | 341 | return reinterpret_cast<char_t*>(header + 1); | 
|---|
|  | 342 | } | 
|---|
|  | 343 |  | 
|---|
|  | 344 | void deallocate_string(char_t* string) | 
|---|
|  | 345 | { | 
|---|
|  | 346 | // get header | 
|---|
|  | 347 | xml_memory_string_header* header = reinterpret_cast<xml_memory_string_header*>(string) - 1; | 
|---|
|  | 348 |  | 
|---|
|  | 349 | // deallocate | 
|---|
|  | 350 | size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset; | 
|---|
|  | 351 | xml_memory_page* page = reinterpret_cast<xml_memory_page*>(reinterpret_cast<char*>(header) - page_offset); | 
|---|
|  | 352 |  | 
|---|
|  | 353 | // if full_size == 0 then this string occupies the whole page | 
|---|
|  | 354 | size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size; | 
|---|
|  | 355 |  | 
|---|
|  | 356 | deallocate_memory(header, full_size, page); | 
|---|
|  | 357 | } | 
|---|
|  | 358 |  | 
|---|
|  | 359 | xml_memory_page* _root; | 
|---|
|  | 360 | size_t _busy_size; | 
|---|
|  | 361 | }; | 
|---|
|  | 362 |  | 
|---|
|  | 363 | PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) | 
|---|
|  | 364 | { | 
|---|
|  | 365 | const size_t large_allocation_threshold = xml_memory_page_size / 4; | 
|---|
|  | 366 |  | 
|---|
|  | 367 | xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); | 
|---|
|  | 368 | if (!page) return 0; | 
|---|
|  | 369 |  | 
|---|
|  | 370 | if (size <= large_allocation_threshold) | 
|---|
|  | 371 | { | 
|---|
|  | 372 | _root->busy_size = _busy_size; | 
|---|
|  | 373 |  | 
|---|
|  | 374 | // insert page at the end of linked list | 
|---|
|  | 375 | page->prev = _root; | 
|---|
|  | 376 | _root->next = page; | 
|---|
|  | 377 | _root = page; | 
|---|
|  | 378 |  | 
|---|
|  | 379 | _busy_size = size; | 
|---|
|  | 380 | } | 
|---|
|  | 381 | else | 
|---|
|  | 382 | { | 
|---|
|  | 383 | // insert page before the end of linked list, so that it is deleted as soon as possible | 
|---|
|  | 384 | // the last page is not deleted even if it's empty (see deallocate_memory) | 
|---|
|  | 385 | assert(_root->prev); | 
|---|
|  | 386 |  | 
|---|
|  | 387 | page->prev = _root->prev; | 
|---|
|  | 388 | page->next = _root; | 
|---|
|  | 389 |  | 
|---|
|  | 390 | _root->prev->next = page; | 
|---|
|  | 391 | _root->prev = page; | 
|---|
|  | 392 | } | 
|---|
|  | 393 |  | 
|---|
|  | 394 | // allocate inside page | 
|---|
|  | 395 | page->busy_size = size; | 
|---|
|  | 396 |  | 
|---|
|  | 397 | out_page = page; | 
|---|
|  | 398 | return page->data; | 
|---|
|  | 399 | } | 
|---|
|  | 400 | } | 
|---|
|  | 401 |  | 
|---|
|  | 402 | namespace pugi | 
|---|
|  | 403 | { | 
|---|
|  | 404 | /// A 'name=value' XML attribute structure. | 
|---|
|  | 405 | struct xml_attribute_struct | 
|---|
|  | 406 | { | 
|---|
|  | 407 | /// Default ctor | 
|---|
|  | 408 | xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) | 
|---|
|  | 409 | { | 
|---|
|  | 410 | } | 
|---|
|  | 411 |  | 
|---|
|  | 412 | uintptr_t header; | 
|---|
|  | 413 |  | 
|---|
|  | 414 | char_t* name;   ///< Pointer to attribute name. | 
|---|
|  | 415 | char_t* value;  ///< Pointer to attribute value. | 
|---|
|  | 416 |  | 
|---|
|  | 417 | xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) | 
|---|
|  | 418 | xml_attribute_struct* next_attribute;   ///< Next attribute | 
|---|
|  | 419 | }; | 
|---|
|  | 420 |  | 
|---|
|  | 421 | /// An XML document tree node. | 
|---|
|  | 422 | struct xml_node_struct | 
|---|
|  | 423 | { | 
|---|
|  | 424 | /// Default ctor | 
|---|
|  | 425 | /// \param type - node type | 
|---|
|  | 426 | xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) | 
|---|
|  | 427 | { | 
|---|
|  | 428 | } | 
|---|
|  | 429 |  | 
|---|
|  | 430 | uintptr_t header; | 
|---|
|  | 431 |  | 
|---|
|  | 432 | xml_node_struct*                parent;                                 ///< Pointer to parent | 
|---|
|  | 433 |  | 
|---|
|  | 434 | char_t*                                 name;                                   ///< Pointer to element name. | 
|---|
|  | 435 | char_t*                                 value;                                  ///< Pointer to any associated string data. | 
|---|
|  | 436 |  | 
|---|
|  | 437 | xml_node_struct*                first_child;                    ///< First child | 
|---|
|  | 438 |  | 
|---|
|  | 439 | xml_node_struct*                prev_sibling_c;                 ///< Left brother (cyclic list) | 
|---|
|  | 440 | xml_node_struct*                next_sibling;                   ///< Right brother | 
|---|
|  | 441 |  | 
|---|
|  | 442 | xml_attribute_struct*   first_attribute;                ///< First attribute | 
|---|
|  | 443 | }; | 
|---|
|  | 444 | } | 
|---|
|  | 445 |  | 
|---|
|  | 446 | namespace | 
|---|
|  | 447 | { | 
|---|
|  | 448 | struct xml_document_struct: public xml_node_struct, public xml_allocator | 
|---|
|  | 449 | { | 
|---|
|  | 450 | xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0) | 
|---|
|  | 451 | { | 
|---|
|  | 452 | } | 
|---|
|  | 453 |  | 
|---|
|  | 454 | const char_t* buffer; | 
|---|
|  | 455 | }; | 
|---|
|  | 456 |  | 
|---|
|  | 457 | static inline xml_allocator& get_allocator(const xml_node_struct* node) | 
|---|
|  | 458 | { | 
|---|
|  | 459 | assert(node); | 
|---|
|  | 460 |  | 
|---|
|  | 461 | return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator; | 
|---|
|  | 462 | } | 
|---|
|  | 463 | } | 
|---|
|  | 464 |  | 
|---|
|  | 465 | // Low-level DOM operations | 
|---|
|  | 466 | namespace | 
|---|
|  | 467 | { | 
|---|
|  | 468 | inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) | 
|---|
|  | 469 | { | 
|---|
|  | 470 | xml_memory_page* page; | 
|---|
|  | 471 | void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page); | 
|---|
|  | 472 |  | 
|---|
|  | 473 | return new (memory) xml_attribute_struct(page); | 
|---|
|  | 474 | } | 
|---|
|  | 475 |  | 
|---|
|  | 476 | inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) | 
|---|
|  | 477 | { | 
|---|
|  | 478 | xml_memory_page* page; | 
|---|
|  | 479 | void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page); | 
|---|
|  | 480 |  | 
|---|
|  | 481 | return new (memory) xml_node_struct(page, type); | 
|---|
|  | 482 | } | 
|---|
|  | 483 |  | 
|---|
|  | 484 | inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) | 
|---|
|  | 485 | { | 
|---|
|  | 486 | uintptr_t header = a->header; | 
|---|
|  | 487 |  | 
|---|
|  | 488 | if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name); | 
|---|
|  | 489 | if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value); | 
|---|
|  | 490 |  | 
|---|
|  | 491 | alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)); | 
|---|
|  | 492 | } | 
|---|
|  | 493 |  | 
|---|
|  | 494 | inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) | 
|---|
|  | 495 | { | 
|---|
|  | 496 | uintptr_t header = n->header; | 
|---|
|  | 497 |  | 
|---|
|  | 498 | if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); | 
|---|
|  | 499 | if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); | 
|---|
|  | 500 |  | 
|---|
|  | 501 | for (xml_attribute_struct* attr = n->first_attribute; attr; ) | 
|---|
|  | 502 | { | 
|---|
|  | 503 | xml_attribute_struct* next = attr->next_attribute; | 
|---|
|  | 504 |  | 
|---|
|  | 505 | destroy_attribute(attr, alloc); | 
|---|
|  | 506 |  | 
|---|
|  | 507 | attr = next; | 
|---|
|  | 508 | } | 
|---|
|  | 509 |  | 
|---|
|  | 510 | for (xml_node_struct* child = n->first_child; child; ) | 
|---|
|  | 511 | { | 
|---|
|  | 512 | xml_node_struct* next = child->next_sibling; | 
|---|
|  | 513 |  | 
|---|
|  | 514 | destroy_node(child, alloc); | 
|---|
|  | 515 |  | 
|---|
|  | 516 | child = next; | 
|---|
|  | 517 | } | 
|---|
|  | 518 |  | 
|---|
|  | 519 | alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)); | 
|---|
|  | 520 | } | 
|---|
|  | 521 |  | 
|---|
|  | 522 | PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) | 
|---|
|  | 523 | { | 
|---|
|  | 524 | xml_node_struct* child = allocate_node(alloc, type); | 
|---|
|  | 525 | if (!child) return 0; | 
|---|
|  | 526 |  | 
|---|
|  | 527 | child->parent = node; | 
|---|
|  | 528 |  | 
|---|
|  | 529 | xml_node_struct* first_child = node->first_child; | 
|---|
|  | 530 |  | 
|---|
|  | 531 | if (first_child) | 
|---|
|  | 532 | { | 
|---|
|  | 533 | xml_node_struct* last_child = first_child->prev_sibling_c; | 
|---|
|  | 534 |  | 
|---|
|  | 535 | last_child->next_sibling = child; | 
|---|
|  | 536 | child->prev_sibling_c = last_child; | 
|---|
|  | 537 | first_child->prev_sibling_c = child; | 
|---|
|  | 538 | } | 
|---|
|  | 539 | else | 
|---|
|  | 540 | { | 
|---|
|  | 541 | node->first_child = child; | 
|---|
|  | 542 | child->prev_sibling_c = child; | 
|---|
|  | 543 | } | 
|---|
|  | 544 |  | 
|---|
|  | 545 | return child; | 
|---|
|  | 546 | } | 
|---|
|  | 547 |  | 
|---|
|  | 548 | PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc) | 
|---|
|  | 549 | { | 
|---|
|  | 550 | xml_attribute_struct* a = allocate_attribute(alloc); | 
|---|
|  | 551 | if (!a) return 0; | 
|---|
|  | 552 |  | 
|---|
|  | 553 | xml_attribute_struct* first_attribute = node->first_attribute; | 
|---|
|  | 554 |  | 
|---|
|  | 555 | if (first_attribute) | 
|---|
|  | 556 | { | 
|---|
|  | 557 | xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c; | 
|---|
|  | 558 |  | 
|---|
|  | 559 | last_attribute->next_attribute = a; | 
|---|
|  | 560 | a->prev_attribute_c = last_attribute; | 
|---|
|  | 561 | first_attribute->prev_attribute_c = a; | 
|---|
|  | 562 | } | 
|---|
|  | 563 | else | 
|---|
|  | 564 | { | 
|---|
|  | 565 | node->first_attribute = a; | 
|---|
|  | 566 | a->prev_attribute_c = a; | 
|---|
|  | 567 | } | 
|---|
|  | 568 |  | 
|---|
|  | 569 | return a; | 
|---|
|  | 570 | } | 
|---|
|  | 571 | } | 
|---|
|  | 572 |  | 
|---|
|  | 573 | // Helper classes for code generation | 
|---|
|  | 574 | namespace | 
|---|
|  | 575 | { | 
|---|
|  | 576 | struct opt_false | 
|---|
|  | 577 | { | 
|---|
|  | 578 | enum { value = 0 }; | 
|---|
|  | 579 | }; | 
|---|
|  | 580 |  | 
|---|
|  | 581 | struct opt_true | 
|---|
|  | 582 | { | 
|---|
|  | 583 | enum { value = 1 }; | 
|---|
|  | 584 | }; | 
|---|
|  | 585 | } | 
|---|
|  | 586 |  | 
|---|
|  | 587 | // Unicode utilities | 
|---|
|  | 588 | namespace | 
|---|
|  | 589 | { | 
|---|
|  | 590 | inline uint16_t endian_swap(uint16_t value) | 
|---|
|  | 591 | { | 
|---|
|  | 592 | return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8)); | 
|---|
|  | 593 | } | 
|---|
|  | 594 |  | 
|---|
|  | 595 | inline uint32_t endian_swap(uint32_t value) | 
|---|
|  | 596 | { | 
|---|
|  | 597 | return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); | 
|---|
|  | 598 | } | 
|---|
|  | 599 |  | 
|---|
|  | 600 | struct utf8_counter | 
|---|
|  | 601 | { | 
|---|
|  | 602 | typedef size_t value_type; | 
|---|
|  | 603 |  | 
|---|
|  | 604 | static value_type low(value_type result, uint32_t ch) | 
|---|
|  | 605 | { | 
|---|
|  | 606 | // U+0000..U+007F | 
|---|
|  | 607 | if (ch < 0x80) return result + 1; | 
|---|
|  | 608 | // U+0080..U+07FF | 
|---|
|  | 609 | else if (ch < 0x800) return result + 2; | 
|---|
|  | 610 | // U+0800..U+FFFF | 
|---|
|  | 611 | else return result + 3; | 
|---|
|  | 612 | } | 
|---|
|  | 613 |  | 
|---|
|  | 614 | static value_type high(value_type result, uint32_t) | 
|---|
|  | 615 | { | 
|---|
|  | 616 | // U+10000..U+10FFFF | 
|---|
|  | 617 | return result + 4; | 
|---|
|  | 618 | } | 
|---|
|  | 619 | }; | 
|---|
|  | 620 |  | 
|---|
|  | 621 | struct utf8_writer | 
|---|
|  | 622 | { | 
|---|
|  | 623 | typedef uint8_t* value_type; | 
|---|
|  | 624 |  | 
|---|
|  | 625 | static value_type low(value_type result, uint32_t ch) | 
|---|
|  | 626 | { | 
|---|
|  | 627 | // U+0000..U+007F | 
|---|
|  | 628 | if (ch < 0x80) | 
|---|
|  | 629 | { | 
|---|
|  | 630 | *result = static_cast<uint8_t>(ch); | 
|---|
|  | 631 | return result + 1; | 
|---|
|  | 632 | } | 
|---|
|  | 633 | // U+0080..U+07FF | 
|---|
|  | 634 | else if (ch < 0x800) | 
|---|
|  | 635 | { | 
|---|
|  | 636 | result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6)); | 
|---|
|  | 637 | result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | 
|---|
|  | 638 | return result + 2; | 
|---|
|  | 639 | } | 
|---|
|  | 640 | // U+0800..U+FFFF | 
|---|
|  | 641 | else | 
|---|
|  | 642 | { | 
|---|
|  | 643 | result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12)); | 
|---|
|  | 644 | result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); | 
|---|
|  | 645 | result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | 
|---|
|  | 646 | return result + 3; | 
|---|
|  | 647 | } | 
|---|
|  | 648 | } | 
|---|
|  | 649 |  | 
|---|
|  | 650 | static value_type high(value_type result, uint32_t ch) | 
|---|
|  | 651 | { | 
|---|
|  | 652 | // U+10000..U+10FFFF | 
|---|
|  | 653 | result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18)); | 
|---|
|  | 654 | result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F)); | 
|---|
|  | 655 | result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); | 
|---|
|  | 656 | result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | 
|---|
|  | 657 | return result + 4; | 
|---|
|  | 658 | } | 
|---|
|  | 659 |  | 
|---|
|  | 660 | static value_type any(value_type result, uint32_t ch) | 
|---|
|  | 661 | { | 
|---|
|  | 662 | return (ch < 0x10000) ? low(result, ch) : high(result, ch); | 
|---|
|  | 663 | } | 
|---|
|  | 664 | }; | 
|---|
|  | 665 |  | 
|---|
|  | 666 | struct utf16_counter | 
|---|
|  | 667 | { | 
|---|
|  | 668 | typedef size_t value_type; | 
|---|
|  | 669 |  | 
|---|
|  | 670 | static value_type low(value_type result, uint32_t) | 
|---|
|  | 671 | { | 
|---|
|  | 672 | return result + 1; | 
|---|
|  | 673 | } | 
|---|
|  | 674 |  | 
|---|
|  | 675 | static value_type high(value_type result, uint32_t) | 
|---|
|  | 676 | { | 
|---|
|  | 677 | return result + 2; | 
|---|
|  | 678 | } | 
|---|
|  | 679 | }; | 
|---|
|  | 680 |  | 
|---|
|  | 681 | struct utf16_writer | 
|---|
|  | 682 | { | 
|---|
|  | 683 | typedef uint16_t* value_type; | 
|---|
|  | 684 |  | 
|---|
|  | 685 | static value_type low(value_type result, uint32_t ch) | 
|---|
|  | 686 | { | 
|---|
|  | 687 | *result = static_cast<uint16_t>(ch); | 
|---|
|  | 688 |  | 
|---|
|  | 689 | return result + 1; | 
|---|
|  | 690 | } | 
|---|
|  | 691 |  | 
|---|
|  | 692 | static value_type high(value_type result, uint32_t ch) | 
|---|
|  | 693 | { | 
|---|
|  | 694 | uint32_t msh = (uint32_t)(ch - 0x10000) >> 10; | 
|---|
|  | 695 | uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff; | 
|---|
|  | 696 |  | 
|---|
|  | 697 | result[0] = static_cast<uint16_t>(0xD800 + msh); | 
|---|
|  | 698 | result[1] = static_cast<uint16_t>(0xDC00 + lsh); | 
|---|
|  | 699 |  | 
|---|
|  | 700 | return result + 2; | 
|---|
|  | 701 | } | 
|---|
|  | 702 |  | 
|---|
|  | 703 | static value_type any(value_type result, uint32_t ch) | 
|---|
|  | 704 | { | 
|---|
|  | 705 | return (ch < 0x10000) ? low(result, ch) : high(result, ch); | 
|---|
|  | 706 | } | 
|---|
|  | 707 | }; | 
|---|
|  | 708 |  | 
|---|
|  | 709 | struct utf32_counter | 
|---|
|  | 710 | { | 
|---|
|  | 711 | typedef size_t value_type; | 
|---|
|  | 712 |  | 
|---|
|  | 713 | static value_type low(value_type result, uint32_t) | 
|---|
|  | 714 | { | 
|---|
|  | 715 | return result + 1; | 
|---|
|  | 716 | } | 
|---|
|  | 717 |  | 
|---|
|  | 718 | static value_type high(value_type result, uint32_t) | 
|---|
|  | 719 | { | 
|---|
|  | 720 | return result + 1; | 
|---|
|  | 721 | } | 
|---|
|  | 722 | }; | 
|---|
|  | 723 |  | 
|---|
|  | 724 | struct utf32_writer | 
|---|
|  | 725 | { | 
|---|
|  | 726 | typedef uint32_t* value_type; | 
|---|
|  | 727 |  | 
|---|
|  | 728 | static value_type low(value_type result, uint32_t ch) | 
|---|
|  | 729 | { | 
|---|
|  | 730 | *result = ch; | 
|---|
|  | 731 |  | 
|---|
|  | 732 | return result + 1; | 
|---|
|  | 733 | } | 
|---|
|  | 734 |  | 
|---|
|  | 735 | static value_type high(value_type result, uint32_t ch) | 
|---|
|  | 736 | { | 
|---|
|  | 737 | *result = ch; | 
|---|
|  | 738 |  | 
|---|
|  | 739 | return result + 1; | 
|---|
|  | 740 | } | 
|---|
|  | 741 |  | 
|---|
|  | 742 | static value_type any(value_type result, uint32_t ch) | 
|---|
|  | 743 | { | 
|---|
|  | 744 | *result = ch; | 
|---|
|  | 745 |  | 
|---|
|  | 746 | return result + 1; | 
|---|
|  | 747 | } | 
|---|
|  | 748 | }; | 
|---|
|  | 749 |  | 
|---|
|  | 750 | template <size_t size> struct wchar_selector; | 
|---|
|  | 751 |  | 
|---|
|  | 752 | template <> struct wchar_selector<2> | 
|---|
|  | 753 | { | 
|---|
|  | 754 | typedef uint16_t type; | 
|---|
|  | 755 | typedef utf16_counter counter; | 
|---|
|  | 756 | typedef utf16_writer writer; | 
|---|
|  | 757 | }; | 
|---|
|  | 758 |  | 
|---|
|  | 759 | template <> struct wchar_selector<4> | 
|---|
|  | 760 | { | 
|---|
|  | 761 | typedef uint32_t type; | 
|---|
|  | 762 | typedef utf32_counter counter; | 
|---|
|  | 763 | typedef utf32_writer writer; | 
|---|
|  | 764 | }; | 
|---|
|  | 765 |  | 
|---|
|  | 766 | typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; | 
|---|
|  | 767 | typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; | 
|---|
|  | 768 |  | 
|---|
|  | 769 | template <typename Traits, typename opt_swap = opt_false> struct utf_decoder | 
|---|
|  | 770 | { | 
|---|
|  | 771 | static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) | 
|---|
|  | 772 | { | 
|---|
|  | 773 | const uint8_t utf8_byte_mask = 0x3f; | 
|---|
|  | 774 |  | 
|---|
|  | 775 | while (size) | 
|---|
|  | 776 | { | 
|---|
|  | 777 | uint8_t lead = *data; | 
|---|
|  | 778 |  | 
|---|
|  | 779 | // 0xxxxxxx -> U+0000..U+007F | 
|---|
|  | 780 | if (lead < 0x80) | 
|---|
|  | 781 | { | 
|---|
|  | 782 | result = Traits::low(result, lead); | 
|---|
|  | 783 | data += 1; | 
|---|
|  | 784 | size -= 1; | 
|---|
|  | 785 |  | 
|---|
|  | 786 | // process aligned single-byte (ascii) blocks | 
|---|
|  | 787 | if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) | 
|---|
|  | 788 | { | 
|---|
|  | 789 | while (size >= 4 && (*reinterpret_cast<const uint32_t*>(data) & 0x80808080) == 0) | 
|---|
|  | 790 | { | 
|---|
|  | 791 | result = Traits::low(result, data[0]); | 
|---|
|  | 792 | result = Traits::low(result, data[1]); | 
|---|
|  | 793 | result = Traits::low(result, data[2]); | 
|---|
|  | 794 | result = Traits::low(result, data[3]); | 
|---|
|  | 795 | data += 4; | 
|---|
|  | 796 | size -= 4; | 
|---|
|  | 797 | } | 
|---|
|  | 798 | } | 
|---|
|  | 799 | } | 
|---|
|  | 800 | // 110xxxxx -> U+0080..U+07FF | 
|---|
|  | 801 | else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) | 
|---|
|  | 802 | { | 
|---|
|  | 803 | result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); | 
|---|
|  | 804 | data += 2; | 
|---|
|  | 805 | size -= 2; | 
|---|
|  | 806 | } | 
|---|
|  | 807 | // 1110xxxx -> U+0800-U+FFFF | 
|---|
|  | 808 | else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) | 
|---|
|  | 809 | { | 
|---|
|  | 810 | result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); | 
|---|
|  | 811 | data += 3; | 
|---|
|  | 812 | size -= 3; | 
|---|
|  | 813 | } | 
|---|
|  | 814 | // 11110xxx -> U+10000..U+10FFFF | 
|---|
|  | 815 | else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) | 
|---|
|  | 816 | { | 
|---|
|  | 817 | result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); | 
|---|
|  | 818 | data += 4; | 
|---|
|  | 819 | size -= 4; | 
|---|
|  | 820 | } | 
|---|
|  | 821 | // 10xxxxxx or 11111xxx -> invalid | 
|---|
|  | 822 | else | 
|---|
|  | 823 | { | 
|---|
|  | 824 | data += 1; | 
|---|
|  | 825 | size -= 1; | 
|---|
|  | 826 | } | 
|---|
|  | 827 | } | 
|---|
|  | 828 |  | 
|---|
|  | 829 | return result; | 
|---|
|  | 830 | } | 
|---|
|  | 831 |  | 
|---|
|  | 832 | static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) | 
|---|
|  | 833 | { | 
|---|
|  | 834 | const uint16_t* end = data + size; | 
|---|
|  | 835 |  | 
|---|
|  | 836 | while (data < end) | 
|---|
|  | 837 | { | 
|---|
|  | 838 | uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; | 
|---|
|  | 839 |  | 
|---|
|  | 840 | // U+0000..U+D7FF | 
|---|
|  | 841 | if (lead < 0xD800) | 
|---|
|  | 842 | { | 
|---|
|  | 843 | result = Traits::low(result, lead); | 
|---|
|  | 844 | data += 1; | 
|---|
|  | 845 | } | 
|---|
|  | 846 | // U+E000..U+FFFF | 
|---|
|  | 847 | else if ((unsigned)(lead - 0xE000) < 0x2000) | 
|---|
|  | 848 | { | 
|---|
|  | 849 | result = Traits::low(result, lead); | 
|---|
|  | 850 | data += 1; | 
|---|
|  | 851 | } | 
|---|
|  | 852 | // surrogate pair lead | 
|---|
|  | 853 | else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end) | 
|---|
|  | 854 | { | 
|---|
|  | 855 | uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; | 
|---|
|  | 856 |  | 
|---|
|  | 857 | if ((unsigned)(next - 0xDC00) < 0x400) | 
|---|
|  | 858 | { | 
|---|
|  | 859 | result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); | 
|---|
|  | 860 | data += 2; | 
|---|
|  | 861 | } | 
|---|
|  | 862 | else | 
|---|
|  | 863 | { | 
|---|
|  | 864 | data += 1; | 
|---|
|  | 865 | } | 
|---|
|  | 866 | } | 
|---|
|  | 867 | else | 
|---|
|  | 868 | { | 
|---|
|  | 869 | data += 1; | 
|---|
|  | 870 | } | 
|---|
|  | 871 | } | 
|---|
|  | 872 |  | 
|---|
|  | 873 | return result; | 
|---|
|  | 874 | } | 
|---|
|  | 875 |  | 
|---|
|  | 876 | static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) | 
|---|
|  | 877 | { | 
|---|
|  | 878 | const uint32_t* end = data + size; | 
|---|
|  | 879 |  | 
|---|
|  | 880 | while (data < end) | 
|---|
|  | 881 | { | 
|---|
|  | 882 | uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; | 
|---|
|  | 883 |  | 
|---|
|  | 884 | // U+0000..U+FFFF | 
|---|
|  | 885 | if (lead < 0x10000) | 
|---|
|  | 886 | { | 
|---|
|  | 887 | result = Traits::low(result, lead); | 
|---|
|  | 888 | data += 1; | 
|---|
|  | 889 | } | 
|---|
|  | 890 | // U+10000..U+10FFFF | 
|---|
|  | 891 | else | 
|---|
|  | 892 | { | 
|---|
|  | 893 | result = Traits::high(result, lead); | 
|---|
|  | 894 | data += 1; | 
|---|
|  | 895 | } | 
|---|
|  | 896 | } | 
|---|
|  | 897 |  | 
|---|
|  | 898 | return result; | 
|---|
|  | 899 | } | 
|---|
|  | 900 | }; | 
|---|
|  | 901 |  | 
|---|
|  | 902 | template <typename T> inline void convert_utf_endian_swap(T* result, const T* data, size_t length) | 
|---|
|  | 903 | { | 
|---|
|  | 904 | for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]); | 
|---|
|  | 905 | } | 
|---|
|  | 906 |  | 
|---|
|  | 907 | inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) | 
|---|
|  | 908 | { | 
|---|
|  | 909 | for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); | 
|---|
|  | 910 | } | 
|---|
|  | 911 | } | 
|---|
|  | 912 |  | 
|---|
|  | 913 | namespace | 
|---|
|  | 914 | { | 
|---|
|  | 915 | enum chartype_t | 
|---|
|  | 916 | { | 
|---|
|  | 917 | ct_parse_pcdata = 1,    // \0, &, \r, < | 
|---|
|  | 918 | ct_parse_attr = 2,              // \0, &, \r, ', " | 
|---|
|  | 919 | ct_parse_attr_ws = 4,   // \0, &, \r, ', ", \n, tab | 
|---|
|  | 920 | ct_space = 8,                   // \r, \n, space, tab | 
|---|
|  | 921 | ct_parse_cdata = 16,    // \0, ], >, \r | 
|---|
|  | 922 | ct_parse_comment = 32,  // \0, -, >, \r | 
|---|
|  | 923 | ct_symbol = 64,                 // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . | 
|---|
|  | 924 | ct_start_symbol = 128   // Any symbol > 127, a-z, A-Z, _, : | 
|---|
|  | 925 | }; | 
|---|
|  | 926 |  | 
|---|
|  | 927 | const unsigned char chartype_table[256] = | 
|---|
|  | 928 | { | 
|---|
|  | 929 | 55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15 | 
|---|
|  | 930 | 0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31 | 
|---|
|  | 931 | 8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47 | 
|---|
|  | 932 | 64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63 | 
|---|
|  | 933 | 0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79 | 
|---|
|  | 934 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95 | 
|---|
|  | 935 | 0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111 | 
|---|
|  | 936 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127 | 
|---|
|  | 937 |  | 
|---|
|  | 938 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+ | 
|---|
|  | 939 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, | 
|---|
|  | 940 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, | 
|---|
|  | 941 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, | 
|---|
|  | 942 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, | 
|---|
|  | 943 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, | 
|---|
|  | 944 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, | 
|---|
|  | 945 | 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192 | 
|---|
|  | 946 | }; | 
|---|
|  | 947 |  | 
|---|
|  | 948 | enum chartypex_t | 
|---|
|  | 949 | { | 
|---|
|  | 950 | ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > | 
|---|
|  | 951 | ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, " | 
|---|
|  | 952 | ctx_start_symbol = 4,     // Any symbol > 127, a-z, A-Z, _ | 
|---|
|  | 953 | ctx_digit = 8,                    // 0-9 | 
|---|
|  | 954 | ctx_symbol = 16                   // Any symbol > 127, a-z, A-Z, 0-9, _, -, . | 
|---|
|  | 955 | }; | 
|---|
|  | 956 |  | 
|---|
|  | 957 | const unsigned char chartypex_table[256] = | 
|---|
|  | 958 | { | 
|---|
|  | 959 | 3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15 | 
|---|
|  | 960 | 3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31 | 
|---|
|  | 961 | 0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47 | 
|---|
|  | 962 | 24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63 | 
|---|
|  | 963 |  | 
|---|
|  | 964 | 0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79 | 
|---|
|  | 965 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95 | 
|---|
|  | 966 | 0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111 | 
|---|
|  | 967 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127 | 
|---|
|  | 968 |  | 
|---|
|  | 969 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+ | 
|---|
|  | 970 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20, | 
|---|
|  | 971 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20, | 
|---|
|  | 972 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20, | 
|---|
|  | 973 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20, | 
|---|
|  | 974 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20, | 
|---|
|  | 975 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20, | 
|---|
|  | 976 | 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20 | 
|---|
|  | 977 | }; | 
|---|
|  | 978 |  | 
|---|
|  | 979 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 980 | #define IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct)) | 
|---|
|  | 981 | #else | 
|---|
|  | 982 | #define IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct)) | 
|---|
|  | 983 | #endif | 
|---|
|  | 984 |  | 
|---|
|  | 985 | #define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table) | 
|---|
|  | 986 | #define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table) | 
|---|
|  | 987 |  | 
|---|
|  | 988 | bool is_little_endian() | 
|---|
|  | 989 | { | 
|---|
|  | 990 | unsigned int ui = 1; | 
|---|
|  | 991 |  | 
|---|
|  | 992 | return *reinterpret_cast<unsigned char*>(&ui) == 1; | 
|---|
|  | 993 | } | 
|---|
|  | 994 |  | 
|---|
|  | 995 | xml_encoding get_wchar_encoding() | 
|---|
|  | 996 | { | 
|---|
|  | 997 | STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); | 
|---|
|  | 998 |  | 
|---|
|  | 999 | if (sizeof(wchar_t) == 2) | 
|---|
|  | 1000 | return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | 
|---|
|  | 1001 | else | 
|---|
|  | 1002 | return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | 
|---|
|  | 1003 | } | 
|---|
|  | 1004 |  | 
|---|
|  | 1005 | xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3) | 
|---|
|  | 1006 | { | 
|---|
|  | 1007 | // look for BOM in first few bytes | 
|---|
|  | 1008 | if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; | 
|---|
|  | 1009 | if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; | 
|---|
|  | 1010 | if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; | 
|---|
|  | 1011 | if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; | 
|---|
|  | 1012 | if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; | 
|---|
|  | 1013 |  | 
|---|
|  | 1014 | // look for <, <? or <?xm in various encodings | 
|---|
|  | 1015 | if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be; | 
|---|
|  | 1016 | if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; | 
|---|
|  | 1017 | if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; | 
|---|
|  | 1018 | if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; | 
|---|
|  | 1019 | if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8; | 
|---|
|  | 1020 |  | 
|---|
|  | 1021 | // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) | 
|---|
|  | 1022 | if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; | 
|---|
|  | 1023 | if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; | 
|---|
|  | 1024 |  | 
|---|
|  | 1025 | // no known BOM detected, assume utf8 | 
|---|
|  | 1026 | return encoding_utf8; | 
|---|
|  | 1027 | } | 
|---|
|  | 1028 |  | 
|---|
|  | 1029 | xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) | 
|---|
|  | 1030 | { | 
|---|
|  | 1031 | // replace wchar encoding with utf implementation | 
|---|
|  | 1032 | if (encoding == encoding_wchar) return get_wchar_encoding(); | 
|---|
|  | 1033 |  | 
|---|
|  | 1034 | // replace utf16 encoding with utf16 with specific endianness | 
|---|
|  | 1035 | if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | 
|---|
|  | 1036 |  | 
|---|
|  | 1037 | // replace utf32 encoding with utf32 with specific endianness | 
|---|
|  | 1038 | if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | 
|---|
|  | 1039 |  | 
|---|
|  | 1040 | // only do autodetection if no explicit encoding is requested | 
|---|
|  | 1041 | if (encoding != encoding_auto) return encoding; | 
|---|
|  | 1042 |  | 
|---|
|  | 1043 | // skip encoding autodetection if input buffer is too small | 
|---|
|  | 1044 | if (size < 4) return encoding_utf8; | 
|---|
|  | 1045 |  | 
|---|
|  | 1046 | // try to guess encoding (based on XML specification, Appendix F.1) | 
|---|
|  | 1047 | const uint8_t* data = static_cast<const uint8_t*>(contents); | 
|---|
|  | 1048 |  | 
|---|
|  | 1049 | DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; | 
|---|
|  | 1050 |  | 
|---|
|  | 1051 | return guess_buffer_encoding(d0, d1, d2, d3); | 
|---|
|  | 1052 | } | 
|---|
|  | 1053 |  | 
|---|
|  | 1054 | bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | 
|---|
|  | 1055 | { | 
|---|
|  | 1056 | if (is_mutable) | 
|---|
|  | 1057 | { | 
|---|
|  | 1058 | out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); | 
|---|
|  | 1059 | } | 
|---|
|  | 1060 | else | 
|---|
|  | 1061 | { | 
|---|
|  | 1062 | void* buffer = global_allocate(size > 0 ? size : 1); | 
|---|
|  | 1063 | if (!buffer) return false; | 
|---|
|  | 1064 |  | 
|---|
|  | 1065 | memcpy(buffer, contents, size); | 
|---|
|  | 1066 |  | 
|---|
|  | 1067 | out_buffer = static_cast<char_t*>(buffer); | 
|---|
|  | 1068 | } | 
|---|
|  | 1069 |  | 
|---|
|  | 1070 | out_length = size / sizeof(char_t); | 
|---|
|  | 1071 |  | 
|---|
|  | 1072 | return true; | 
|---|
|  | 1073 | } | 
|---|
|  | 1074 |  | 
|---|
|  | 1075 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 1076 | inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re) | 
|---|
|  | 1077 | { | 
|---|
|  | 1078 | return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || | 
|---|
|  | 1079 | (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); | 
|---|
|  | 1080 | } | 
|---|
|  | 1081 |  | 
|---|
|  | 1082 | bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | 
|---|
|  | 1083 | { | 
|---|
|  | 1084 | const char_t* data = static_cast<const char_t*>(contents); | 
|---|
|  | 1085 |  | 
|---|
|  | 1086 | if (is_mutable) | 
|---|
|  | 1087 | { | 
|---|
|  | 1088 | out_buffer = const_cast<char_t*>(data); | 
|---|
|  | 1089 | } | 
|---|
|  | 1090 | else | 
|---|
|  | 1091 | { | 
|---|
|  | 1092 | out_buffer = static_cast<char_t*>(global_allocate(size > 0 ? size : 1)); | 
|---|
|  | 1093 | if (!out_buffer) return false; | 
|---|
|  | 1094 | } | 
|---|
|  | 1095 |  | 
|---|
|  | 1096 | out_length = size / sizeof(char_t); | 
|---|
|  | 1097 |  | 
|---|
|  | 1098 | convert_wchar_endian_swap(out_buffer, data, out_length); | 
|---|
|  | 1099 |  | 
|---|
|  | 1100 | return true; | 
|---|
|  | 1101 | } | 
|---|
|  | 1102 |  | 
|---|
|  | 1103 | bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) | 
|---|
|  | 1104 | { | 
|---|
|  | 1105 | const uint8_t* data = static_cast<const uint8_t*>(contents); | 
|---|
|  | 1106 |  | 
|---|
|  | 1107 | // first pass: get length in wchar_t units | 
|---|
|  | 1108 | out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0); | 
|---|
|  | 1109 |  | 
|---|
|  | 1110 | // allocate buffer of suitable length | 
|---|
|  | 1111 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); | 
|---|
|  | 1112 | if (!out_buffer) return false; | 
|---|
|  | 1113 |  | 
|---|
|  | 1114 | // second pass: convert utf8 input to wchar_t | 
|---|
|  | 1115 | wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer); | 
|---|
|  | 1116 | wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin); | 
|---|
|  | 1117 |  | 
|---|
|  | 1118 | assert(out_end == out_begin + out_length); | 
|---|
|  | 1119 | (void)!out_end; | 
|---|
|  | 1120 |  | 
|---|
|  | 1121 | return true; | 
|---|
|  | 1122 | } | 
|---|
|  | 1123 |  | 
|---|
|  | 1124 | template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) | 
|---|
|  | 1125 | { | 
|---|
|  | 1126 | const uint16_t* data = static_cast<const uint16_t*>(contents); | 
|---|
|  | 1127 | size_t length = size / sizeof(uint16_t); | 
|---|
|  | 1128 |  | 
|---|
|  | 1129 | // first pass: get length in wchar_t units | 
|---|
|  | 1130 | out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0); | 
|---|
|  | 1131 |  | 
|---|
|  | 1132 | // allocate buffer of suitable length | 
|---|
|  | 1133 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); | 
|---|
|  | 1134 | if (!out_buffer) return false; | 
|---|
|  | 1135 |  | 
|---|
|  | 1136 | // second pass: convert utf16 input to wchar_t | 
|---|
|  | 1137 | wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer); | 
|---|
|  | 1138 | wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin); | 
|---|
|  | 1139 |  | 
|---|
|  | 1140 | assert(out_end == out_begin + out_length); | 
|---|
|  | 1141 | (void)!out_end; | 
|---|
|  | 1142 |  | 
|---|
|  | 1143 | return true; | 
|---|
|  | 1144 | } | 
|---|
|  | 1145 |  | 
|---|
|  | 1146 | template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) | 
|---|
|  | 1147 | { | 
|---|
|  | 1148 | const uint32_t* data = static_cast<const uint32_t*>(contents); | 
|---|
|  | 1149 | size_t length = size / sizeof(uint32_t); | 
|---|
|  | 1150 |  | 
|---|
|  | 1151 | // first pass: get length in wchar_t units | 
|---|
|  | 1152 | out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0); | 
|---|
|  | 1153 |  | 
|---|
|  | 1154 | // allocate buffer of suitable length | 
|---|
|  | 1155 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); | 
|---|
|  | 1156 | if (!out_buffer) return false; | 
|---|
|  | 1157 |  | 
|---|
|  | 1158 | // second pass: convert utf32 input to wchar_t | 
|---|
|  | 1159 | wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer); | 
|---|
|  | 1160 | wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin); | 
|---|
|  | 1161 |  | 
|---|
|  | 1162 | assert(out_end == out_begin + out_length); | 
|---|
|  | 1163 | (void)!out_end; | 
|---|
|  | 1164 |  | 
|---|
|  | 1165 | return true; | 
|---|
|  | 1166 | } | 
|---|
|  | 1167 |  | 
|---|
|  | 1168 | bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) | 
|---|
|  | 1169 | { | 
|---|
|  | 1170 | // get native encoding | 
|---|
|  | 1171 | xml_encoding wchar_encoding = get_wchar_encoding(); | 
|---|
|  | 1172 |  | 
|---|
|  | 1173 | // fast path: no conversion required | 
|---|
|  | 1174 | if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | 
|---|
|  | 1175 |  | 
|---|
|  | 1176 | // only endian-swapping is required | 
|---|
|  | 1177 | if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); | 
|---|
|  | 1178 |  | 
|---|
|  | 1179 | // source encoding is utf8 | 
|---|
|  | 1180 | if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size); | 
|---|
|  | 1181 |  | 
|---|
|  | 1182 | // source encoding is utf16 | 
|---|
|  | 1183 | if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | 
|---|
|  | 1184 | { | 
|---|
|  | 1185 | xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | 
|---|
|  | 1186 |  | 
|---|
|  | 1187 | return (native_encoding == encoding) ? | 
|---|
|  | 1188 | convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : | 
|---|
|  | 1189 | convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); | 
|---|
|  | 1190 | } | 
|---|
|  | 1191 |  | 
|---|
|  | 1192 | // source encoding is utf32 | 
|---|
|  | 1193 | if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | 
|---|
|  | 1194 | { | 
|---|
|  | 1195 | xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | 
|---|
|  | 1196 |  | 
|---|
|  | 1197 | return (native_encoding == encoding) ? | 
|---|
|  | 1198 | convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : | 
|---|
|  | 1199 | convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); | 
|---|
|  | 1200 | } | 
|---|
|  | 1201 |  | 
|---|
|  | 1202 | assert(!"Invalid encoding"); | 
|---|
|  | 1203 | return false; | 
|---|
|  | 1204 | } | 
|---|
|  | 1205 | #else | 
|---|
|  | 1206 | template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) | 
|---|
|  | 1207 | { | 
|---|
|  | 1208 | const uint16_t* data = static_cast<const uint16_t*>(contents); | 
|---|
|  | 1209 | size_t length = size / sizeof(uint16_t); | 
|---|
|  | 1210 |  | 
|---|
|  | 1211 | // first pass: get length in utf8 units | 
|---|
|  | 1212 | out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0); | 
|---|
|  | 1213 |  | 
|---|
|  | 1214 | // allocate buffer of suitable length | 
|---|
|  | 1215 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); | 
|---|
|  | 1216 | if (!out_buffer) return false; | 
|---|
|  | 1217 |  | 
|---|
|  | 1218 | // second pass: convert utf16 input to utf8 | 
|---|
|  | 1219 | uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer); | 
|---|
|  | 1220 | uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin); | 
|---|
|  | 1221 |  | 
|---|
|  | 1222 | assert(out_end == out_begin + out_length); | 
|---|
|  | 1223 | (void)!out_end; | 
|---|
|  | 1224 |  | 
|---|
|  | 1225 | return true; | 
|---|
|  | 1226 | } | 
|---|
|  | 1227 |  | 
|---|
|  | 1228 | template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) | 
|---|
|  | 1229 | { | 
|---|
|  | 1230 | const uint32_t* data = static_cast<const uint32_t*>(contents); | 
|---|
|  | 1231 | size_t length = size / sizeof(uint32_t); | 
|---|
|  | 1232 |  | 
|---|
|  | 1233 | // first pass: get length in utf8 units | 
|---|
|  | 1234 | out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0); | 
|---|
|  | 1235 |  | 
|---|
|  | 1236 | // allocate buffer of suitable length | 
|---|
|  | 1237 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); | 
|---|
|  | 1238 | if (!out_buffer) return false; | 
|---|
|  | 1239 |  | 
|---|
|  | 1240 | // second pass: convert utf32 input to utf8 | 
|---|
|  | 1241 | uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer); | 
|---|
|  | 1242 | uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin); | 
|---|
|  | 1243 |  | 
|---|
|  | 1244 | assert(out_end == out_begin + out_length); | 
|---|
|  | 1245 | (void)!out_end; | 
|---|
|  | 1246 |  | 
|---|
|  | 1247 | return true; | 
|---|
|  | 1248 | } | 
|---|
|  | 1249 |  | 
|---|
|  | 1250 | bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) | 
|---|
|  | 1251 | { | 
|---|
|  | 1252 | // fast path: no conversion required | 
|---|
|  | 1253 | if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | 
|---|
|  | 1254 |  | 
|---|
|  | 1255 | // source encoding is utf16 | 
|---|
|  | 1256 | if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | 
|---|
|  | 1257 | { | 
|---|
|  | 1258 | xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | 
|---|
|  | 1259 |  | 
|---|
|  | 1260 | return (native_encoding == encoding) ? | 
|---|
|  | 1261 | convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : | 
|---|
|  | 1262 | convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); | 
|---|
|  | 1263 | } | 
|---|
|  | 1264 |  | 
|---|
|  | 1265 | // source encoding is utf32 | 
|---|
|  | 1266 | if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | 
|---|
|  | 1267 | { | 
|---|
|  | 1268 | xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | 
|---|
|  | 1269 |  | 
|---|
|  | 1270 | return (native_encoding == encoding) ? | 
|---|
|  | 1271 | convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : | 
|---|
|  | 1272 | convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); | 
|---|
|  | 1273 | } | 
|---|
|  | 1274 |  | 
|---|
|  | 1275 | assert(!"Invalid encoding"); | 
|---|
|  | 1276 | return false; | 
|---|
|  | 1277 | } | 
|---|
|  | 1278 | #endif | 
|---|
|  | 1279 |  | 
|---|
|  | 1280 | size_t as_utf8_begin(const wchar_t* str, size_t length) | 
|---|
|  | 1281 | { | 
|---|
|  | 1282 | STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); | 
|---|
|  | 1283 |  | 
|---|
|  | 1284 | // get length in utf8 characters | 
|---|
|  | 1285 | return sizeof(wchar_t) == 2 ? | 
|---|
|  | 1286 | utf_decoder<utf8_counter>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, 0) : | 
|---|
|  | 1287 | utf_decoder<utf8_counter>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, 0); | 
|---|
|  | 1288 | } | 
|---|
|  | 1289 |  | 
|---|
|  | 1290 | void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) | 
|---|
|  | 1291 | { | 
|---|
|  | 1292 | STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); | 
|---|
|  | 1293 |  | 
|---|
|  | 1294 | // convert to utf8 | 
|---|
|  | 1295 | uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); | 
|---|
|  | 1296 | uint8_t* end = sizeof(wchar_t) == 2 ? | 
|---|
|  | 1297 | utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, begin) : | 
|---|
|  | 1298 | utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, begin); | 
|---|
|  | 1299 |  | 
|---|
|  | 1300 | assert(begin + size == end); | 
|---|
|  | 1301 | (void)!end; | 
|---|
|  | 1302 |  | 
|---|
|  | 1303 | // zero-terminate | 
|---|
|  | 1304 | buffer[size] = 0; | 
|---|
|  | 1305 | } | 
|---|
|  | 1306 |  | 
|---|
|  | 1307 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 1308 | std::string as_utf8_impl(const wchar_t* str, size_t length) | 
|---|
|  | 1309 | { | 
|---|
|  | 1310 | // first pass: get length in utf8 characters | 
|---|
|  | 1311 | size_t size = as_utf8_begin(str, length); | 
|---|
|  | 1312 |  | 
|---|
|  | 1313 | // allocate resulting string | 
|---|
|  | 1314 | std::string result; | 
|---|
|  | 1315 | result.resize(size); | 
|---|
|  | 1316 |  | 
|---|
|  | 1317 | // second pass: convert to utf8 | 
|---|
|  | 1318 | if (size > 0) as_utf8_end(&result[0], size, str, length); | 
|---|
|  | 1319 |  | 
|---|
|  | 1320 | return result; | 
|---|
|  | 1321 | } | 
|---|
|  | 1322 |  | 
|---|
|  | 1323 | std::wstring as_wide_impl(const char* str, size_t size) | 
|---|
|  | 1324 | { | 
|---|
|  | 1325 | const uint8_t* data = reinterpret_cast<const uint8_t*>(str); | 
|---|
|  | 1326 |  | 
|---|
|  | 1327 | // first pass: get length in wchar_t units | 
|---|
|  | 1328 | size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0); | 
|---|
|  | 1329 |  | 
|---|
|  | 1330 | // allocate resulting string | 
|---|
|  | 1331 | std::wstring result; | 
|---|
|  | 1332 | result.resize(length); | 
|---|
|  | 1333 |  | 
|---|
|  | 1334 | // second pass: convert to wchar_t | 
|---|
|  | 1335 | if (length > 0) | 
|---|
|  | 1336 | { | 
|---|
|  | 1337 | wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); | 
|---|
|  | 1338 | wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin); | 
|---|
|  | 1339 |  | 
|---|
|  | 1340 | assert(begin + length == end); | 
|---|
|  | 1341 | (void)!end; | 
|---|
|  | 1342 | } | 
|---|
|  | 1343 |  | 
|---|
|  | 1344 | return result; | 
|---|
|  | 1345 | } | 
|---|
|  | 1346 | #endif | 
|---|
|  | 1347 |  | 
|---|
|  | 1348 | inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target) | 
|---|
|  | 1349 | { | 
|---|
|  | 1350 | assert(target); | 
|---|
|  | 1351 | size_t target_length = strlength(target); | 
|---|
|  | 1352 |  | 
|---|
|  | 1353 | // always reuse document buffer memory if possible | 
|---|
|  | 1354 | if (!allocated) return target_length >= length; | 
|---|
|  | 1355 |  | 
|---|
|  | 1356 | // reuse heap memory if waste is not too great | 
|---|
|  | 1357 | const size_t reuse_threshold = 32; | 
|---|
|  | 1358 |  | 
|---|
|  | 1359 | return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); | 
|---|
|  | 1360 | } | 
|---|
|  | 1361 |  | 
|---|
|  | 1362 | bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) | 
|---|
|  | 1363 | { | 
|---|
|  | 1364 | size_t source_length = strlength(source); | 
|---|
|  | 1365 |  | 
|---|
|  | 1366 | if (source_length == 0) | 
|---|
|  | 1367 | { | 
|---|
|  | 1368 | // empty string and null pointer are equivalent, so just deallocate old memory | 
|---|
|  | 1369 | xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator; | 
|---|
|  | 1370 |  | 
|---|
|  | 1371 | if (header & header_mask) alloc->deallocate_string(dest); | 
|---|
|  | 1372 |  | 
|---|
|  | 1373 | // mark the string as not allocated | 
|---|
|  | 1374 | dest = 0; | 
|---|
|  | 1375 | header &= ~header_mask; | 
|---|
|  | 1376 |  | 
|---|
|  | 1377 | return true; | 
|---|
|  | 1378 | } | 
|---|
|  | 1379 | else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest)) | 
|---|
|  | 1380 | { | 
|---|
|  | 1381 | // we can reuse old buffer, so just copy the new data (including zero terminator) | 
|---|
|  | 1382 | memcpy(dest, source, (source_length + 1) * sizeof(char_t)); | 
|---|
|  | 1383 |  | 
|---|
|  | 1384 | return true; | 
|---|
|  | 1385 | } | 
|---|
|  | 1386 | else | 
|---|
|  | 1387 | { | 
|---|
|  | 1388 | xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator; | 
|---|
|  | 1389 |  | 
|---|
|  | 1390 | // allocate new buffer | 
|---|
|  | 1391 | char_t* buf = alloc->allocate_string(source_length + 1); | 
|---|
|  | 1392 | if (!buf) return false; | 
|---|
|  | 1393 |  | 
|---|
|  | 1394 | // copy the string (including zero terminator) | 
|---|
|  | 1395 | memcpy(buf, source, (source_length + 1) * sizeof(char_t)); | 
|---|
|  | 1396 |  | 
|---|
|  | 1397 | // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) | 
|---|
|  | 1398 | if (header & header_mask) alloc->deallocate_string(dest); | 
|---|
|  | 1399 |  | 
|---|
|  | 1400 | // the string is now allocated, so set the flag | 
|---|
|  | 1401 | dest = buf; | 
|---|
|  | 1402 | header |= header_mask; | 
|---|
|  | 1403 |  | 
|---|
|  | 1404 | return true; | 
|---|
|  | 1405 | } | 
|---|
|  | 1406 | } | 
|---|
|  | 1407 |  | 
|---|
|  | 1408 | struct gap | 
|---|
|  | 1409 | { | 
|---|
|  | 1410 | char_t* end; | 
|---|
|  | 1411 | size_t size; | 
|---|
|  | 1412 |  | 
|---|
|  | 1413 | gap(): end(0), size(0) | 
|---|
|  | 1414 | { | 
|---|
|  | 1415 | } | 
|---|
|  | 1416 |  | 
|---|
|  | 1417 | // Push new gap, move s count bytes further (skipping the gap). | 
|---|
|  | 1418 | // Collapse previous gap. | 
|---|
|  | 1419 | void push(char_t*& s, size_t count) | 
|---|
|  | 1420 | { | 
|---|
|  | 1421 | if (end) // there was a gap already; collapse it | 
|---|
|  | 1422 | { | 
|---|
|  | 1423 | // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) | 
|---|
|  | 1424 | assert(s >= end); | 
|---|
|  | 1425 | memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); | 
|---|
|  | 1426 | } | 
|---|
|  | 1427 |  | 
|---|
|  | 1428 | s += count; // end of current gap | 
|---|
|  | 1429 |  | 
|---|
|  | 1430 | // "merge" two gaps | 
|---|
|  | 1431 | end = s; | 
|---|
|  | 1432 | size += count; | 
|---|
|  | 1433 | } | 
|---|
|  | 1434 |  | 
|---|
|  | 1435 | // Collapse all gaps, return past-the-end pointer | 
|---|
|  | 1436 | char_t* flush(char_t* s) | 
|---|
|  | 1437 | { | 
|---|
|  | 1438 | if (end) | 
|---|
|  | 1439 | { | 
|---|
|  | 1440 | // Move [old_gap_end, current_pos) to [old_gap_start, ...) | 
|---|
|  | 1441 | assert(s >= end); | 
|---|
|  | 1442 | memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); | 
|---|
|  | 1443 |  | 
|---|
|  | 1444 | return s - size; | 
|---|
|  | 1445 | } | 
|---|
|  | 1446 | else return s; | 
|---|
|  | 1447 | } | 
|---|
|  | 1448 | }; | 
|---|
|  | 1449 |  | 
|---|
|  | 1450 | char_t* strconv_escape(char_t* s, gap& g) | 
|---|
|  | 1451 | { | 
|---|
|  | 1452 | char_t* stre = s + 1; | 
|---|
|  | 1453 |  | 
|---|
|  | 1454 | switch (*stre) | 
|---|
|  | 1455 | { | 
|---|
|  | 1456 | case '#':       // &#... | 
|---|
|  | 1457 | { | 
|---|
|  | 1458 | unsigned int ucsc = 0; | 
|---|
|  | 1459 |  | 
|---|
|  | 1460 | if (stre[1] == 'x') // &#x... (hex code) | 
|---|
|  | 1461 | { | 
|---|
|  | 1462 | stre += 2; | 
|---|
|  | 1463 |  | 
|---|
|  | 1464 | char_t ch = *stre; | 
|---|
|  | 1465 |  | 
|---|
|  | 1466 | if (ch == ';') return stre; | 
|---|
|  | 1467 |  | 
|---|
|  | 1468 | for (;;) | 
|---|
|  | 1469 | { | 
|---|
|  | 1470 | if (static_cast<unsigned int>(ch - '0') <= 9) | 
|---|
|  | 1471 | ucsc = 16 * ucsc + (ch - '0'); | 
|---|
|  | 1472 | else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5) | 
|---|
|  | 1473 | ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); | 
|---|
|  | 1474 | else if (ch == ';') | 
|---|
|  | 1475 | break; | 
|---|
|  | 1476 | else // cancel | 
|---|
|  | 1477 | return stre; | 
|---|
|  | 1478 |  | 
|---|
|  | 1479 | ch = *++stre; | 
|---|
|  | 1480 | } | 
|---|
|  | 1481 |  | 
|---|
|  | 1482 | ++stre; | 
|---|
|  | 1483 | } | 
|---|
|  | 1484 | else    // &#... (dec code) | 
|---|
|  | 1485 | { | 
|---|
|  | 1486 | char_t ch = *++stre; | 
|---|
|  | 1487 |  | 
|---|
|  | 1488 | if (ch == ';') return stre; | 
|---|
|  | 1489 |  | 
|---|
|  | 1490 | for (;;) | 
|---|
|  | 1491 | { | 
|---|
|  | 1492 | if (static_cast<unsigned int>(ch - '0') <= 9) | 
|---|
|  | 1493 | ucsc = 10 * ucsc + (ch - '0'); | 
|---|
|  | 1494 | else if (ch == ';') | 
|---|
|  | 1495 | break; | 
|---|
|  | 1496 | else // cancel | 
|---|
|  | 1497 | return stre; | 
|---|
|  | 1498 |  | 
|---|
|  | 1499 | ch = *++stre; | 
|---|
|  | 1500 | } | 
|---|
|  | 1501 |  | 
|---|
|  | 1502 | ++stre; | 
|---|
|  | 1503 | } | 
|---|
|  | 1504 |  | 
|---|
|  | 1505 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 1506 | s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc)); | 
|---|
|  | 1507 | #else | 
|---|
|  | 1508 | s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc)); | 
|---|
|  | 1509 | #endif | 
|---|
|  | 1510 |  | 
|---|
|  | 1511 | g.push(s, stre - s); | 
|---|
|  | 1512 | return stre; | 
|---|
|  | 1513 | } | 
|---|
|  | 1514 | case 'a':       // &a | 
|---|
|  | 1515 | { | 
|---|
|  | 1516 | ++stre; | 
|---|
|  | 1517 |  | 
|---|
|  | 1518 | if (*stre == 'm') // &am | 
|---|
|  | 1519 | { | 
|---|
|  | 1520 | if (*++stre == 'p' && *++stre == ';') // & | 
|---|
|  | 1521 | { | 
|---|
|  | 1522 | *s++ = '&'; | 
|---|
|  | 1523 | ++stre; | 
|---|
|  | 1524 |  | 
|---|
|  | 1525 | g.push(s, stre - s); | 
|---|
|  | 1526 | return stre; | 
|---|
|  | 1527 | } | 
|---|
|  | 1528 | } | 
|---|
|  | 1529 | else if (*stre == 'p') // &ap | 
|---|
|  | 1530 | { | 
|---|
|  | 1531 | if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' | 
|---|
|  | 1532 | { | 
|---|
|  | 1533 | *s++ = '\''; | 
|---|
|  | 1534 | ++stre; | 
|---|
|  | 1535 |  | 
|---|
|  | 1536 | g.push(s, stre - s); | 
|---|
|  | 1537 | return stre; | 
|---|
|  | 1538 | } | 
|---|
|  | 1539 | } | 
|---|
|  | 1540 | break; | 
|---|
|  | 1541 | } | 
|---|
|  | 1542 | case 'g': // &g | 
|---|
|  | 1543 | { | 
|---|
|  | 1544 | if (*++stre == 't' && *++stre == ';') // > | 
|---|
|  | 1545 | { | 
|---|
|  | 1546 | *s++ = '>'; | 
|---|
|  | 1547 | ++stre; | 
|---|
|  | 1548 |  | 
|---|
|  | 1549 | g.push(s, stre - s); | 
|---|
|  | 1550 | return stre; | 
|---|
|  | 1551 | } | 
|---|
|  | 1552 | break; | 
|---|
|  | 1553 | } | 
|---|
|  | 1554 | case 'l': // &l | 
|---|
|  | 1555 | { | 
|---|
|  | 1556 | if (*++stre == 't' && *++stre == ';') // < | 
|---|
|  | 1557 | { | 
|---|
|  | 1558 | *s++ = '<'; | 
|---|
|  | 1559 | ++stre; | 
|---|
|  | 1560 |  | 
|---|
|  | 1561 | g.push(s, stre - s); | 
|---|
|  | 1562 | return stre; | 
|---|
|  | 1563 | } | 
|---|
|  | 1564 | break; | 
|---|
|  | 1565 | } | 
|---|
|  | 1566 | case 'q': // &q | 
|---|
|  | 1567 | { | 
|---|
|  | 1568 | if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " | 
|---|
|  | 1569 | { | 
|---|
|  | 1570 | *s++ = '"'; | 
|---|
|  | 1571 | ++stre; | 
|---|
|  | 1572 |  | 
|---|
|  | 1573 | g.push(s, stre - s); | 
|---|
|  | 1574 | return stre; | 
|---|
|  | 1575 | } | 
|---|
|  | 1576 | break; | 
|---|
|  | 1577 | } | 
|---|
|  | 1578 | } | 
|---|
|  | 1579 |  | 
|---|
|  | 1580 | return stre; | 
|---|
|  | 1581 | } | 
|---|
|  | 1582 |  | 
|---|
|  | 1583 | // Utility macro for last character handling | 
|---|
|  | 1584 | #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) | 
|---|
|  | 1585 |  | 
|---|
|  | 1586 | char_t* strconv_comment(char_t* s, char_t endch) | 
|---|
|  | 1587 | { | 
|---|
|  | 1588 | gap g; | 
|---|
|  | 1589 |  | 
|---|
|  | 1590 | while (true) | 
|---|
|  | 1591 | { | 
|---|
|  | 1592 | while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s; | 
|---|
|  | 1593 |  | 
|---|
|  | 1594 | if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | 
|---|
|  | 1595 | { | 
|---|
|  | 1596 | *s++ = '\n'; // replace first one with 0x0a | 
|---|
|  | 1597 |  | 
|---|
|  | 1598 | if (*s == '\n') g.push(s, 1); | 
|---|
|  | 1599 | } | 
|---|
|  | 1600 | else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here | 
|---|
|  | 1601 | { | 
|---|
|  | 1602 | *g.flush(s) = 0; | 
|---|
|  | 1603 |  | 
|---|
|  | 1604 | return s + (s[2] == '>' ? 3 : 2); | 
|---|
|  | 1605 | } | 
|---|
|  | 1606 | else if (*s == 0) | 
|---|
|  | 1607 | { | 
|---|
|  | 1608 | return 0; | 
|---|
|  | 1609 | } | 
|---|
|  | 1610 | else ++s; | 
|---|
|  | 1611 | } | 
|---|
|  | 1612 | } | 
|---|
|  | 1613 |  | 
|---|
|  | 1614 | char_t* strconv_cdata(char_t* s, char_t endch) | 
|---|
|  | 1615 | { | 
|---|
|  | 1616 | gap g; | 
|---|
|  | 1617 |  | 
|---|
|  | 1618 | while (true) | 
|---|
|  | 1619 | { | 
|---|
|  | 1620 | while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s; | 
|---|
|  | 1621 |  | 
|---|
|  | 1622 | if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | 
|---|
|  | 1623 | { | 
|---|
|  | 1624 | *s++ = '\n'; // replace first one with 0x0a | 
|---|
|  | 1625 |  | 
|---|
|  | 1626 | if (*s == '\n') g.push(s, 1); | 
|---|
|  | 1627 | } | 
|---|
|  | 1628 | else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here | 
|---|
|  | 1629 | { | 
|---|
|  | 1630 | *g.flush(s) = 0; | 
|---|
|  | 1631 |  | 
|---|
|  | 1632 | return s + 1; | 
|---|
|  | 1633 | } | 
|---|
|  | 1634 | else if (*s == 0) | 
|---|
|  | 1635 | { | 
|---|
|  | 1636 | return 0; | 
|---|
|  | 1637 | } | 
|---|
|  | 1638 | else ++s; | 
|---|
|  | 1639 | } | 
|---|
|  | 1640 | } | 
|---|
|  | 1641 |  | 
|---|
|  | 1642 | typedef char_t* (*strconv_pcdata_t)(char_t*); | 
|---|
|  | 1643 |  | 
|---|
|  | 1644 | template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl | 
|---|
|  | 1645 | { | 
|---|
|  | 1646 | static char_t* parse(char_t* s) | 
|---|
|  | 1647 | { | 
|---|
|  | 1648 | gap g; | 
|---|
|  | 1649 |  | 
|---|
|  | 1650 | while (true) | 
|---|
|  | 1651 | { | 
|---|
|  | 1652 | while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s; | 
|---|
|  | 1653 |  | 
|---|
|  | 1654 | if (*s == '<') // PCDATA ends here | 
|---|
|  | 1655 | { | 
|---|
|  | 1656 | *g.flush(s) = 0; | 
|---|
|  | 1657 |  | 
|---|
|  | 1658 | return s + 1; | 
|---|
|  | 1659 | } | 
|---|
|  | 1660 | else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | 
|---|
|  | 1661 | { | 
|---|
|  | 1662 | *s++ = '\n'; // replace first one with 0x0a | 
|---|
|  | 1663 |  | 
|---|
|  | 1664 | if (*s == '\n') g.push(s, 1); | 
|---|
|  | 1665 | } | 
|---|
|  | 1666 | else if (opt_escape::value && *s == '&') | 
|---|
|  | 1667 | { | 
|---|
|  | 1668 | s = strconv_escape(s, g); | 
|---|
|  | 1669 | } | 
|---|
|  | 1670 | else if (*s == 0) | 
|---|
|  | 1671 | { | 
|---|
|  | 1672 | return s; | 
|---|
|  | 1673 | } | 
|---|
|  | 1674 | else ++s; | 
|---|
|  | 1675 | } | 
|---|
|  | 1676 | } | 
|---|
|  | 1677 | }; | 
|---|
|  | 1678 |  | 
|---|
|  | 1679 | strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) | 
|---|
|  | 1680 | { | 
|---|
|  | 1681 | STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20); | 
|---|
|  | 1682 |  | 
|---|
|  | 1683 | switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes) | 
|---|
|  | 1684 | { | 
|---|
|  | 1685 | case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse; | 
|---|
|  | 1686 | case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse; | 
|---|
|  | 1687 | case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse; | 
|---|
|  | 1688 | case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse; | 
|---|
|  | 1689 | default: return 0; // should not get here | 
|---|
|  | 1690 | } | 
|---|
|  | 1691 | } | 
|---|
|  | 1692 |  | 
|---|
|  | 1693 | typedef char_t* (*strconv_attribute_t)(char_t*, char_t); | 
|---|
|  | 1694 |  | 
|---|
|  | 1695 | template <typename opt_escape> struct strconv_attribute_impl | 
|---|
|  | 1696 | { | 
|---|
|  | 1697 | static char_t* parse_wnorm(char_t* s, char_t end_quote) | 
|---|
|  | 1698 | { | 
|---|
|  | 1699 | gap g; | 
|---|
|  | 1700 |  | 
|---|
|  | 1701 | // trim leading whitespaces | 
|---|
|  | 1702 | if (IS_CHARTYPE(*s, ct_space)) | 
|---|
|  | 1703 | { | 
|---|
|  | 1704 | char_t* str = s; | 
|---|
|  | 1705 |  | 
|---|
|  | 1706 | do ++str; | 
|---|
|  | 1707 | while (IS_CHARTYPE(*str, ct_space)); | 
|---|
|  | 1708 |  | 
|---|
|  | 1709 | g.push(s, str - s); | 
|---|
|  | 1710 | } | 
|---|
|  | 1711 |  | 
|---|
|  | 1712 | while (true) | 
|---|
|  | 1713 | { | 
|---|
|  | 1714 | while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s; | 
|---|
|  | 1715 |  | 
|---|
|  | 1716 | if (*s == end_quote) | 
|---|
|  | 1717 | { | 
|---|
|  | 1718 | char_t* str = g.flush(s); | 
|---|
|  | 1719 |  | 
|---|
|  | 1720 | do *str-- = 0; | 
|---|
|  | 1721 | while (IS_CHARTYPE(*str, ct_space)); | 
|---|
|  | 1722 |  | 
|---|
|  | 1723 | return s + 1; | 
|---|
|  | 1724 | } | 
|---|
|  | 1725 | else if (IS_CHARTYPE(*s, ct_space)) | 
|---|
|  | 1726 | { | 
|---|
|  | 1727 | *s++ = ' '; | 
|---|
|  | 1728 |  | 
|---|
|  | 1729 | if (IS_CHARTYPE(*s, ct_space)) | 
|---|
|  | 1730 | { | 
|---|
|  | 1731 | char_t* str = s + 1; | 
|---|
|  | 1732 | while (IS_CHARTYPE(*str, ct_space)) ++str; | 
|---|
|  | 1733 |  | 
|---|
|  | 1734 | g.push(s, str - s); | 
|---|
|  | 1735 | } | 
|---|
|  | 1736 | } | 
|---|
|  | 1737 | else if (opt_escape::value && *s == '&') | 
|---|
|  | 1738 | { | 
|---|
|  | 1739 | s = strconv_escape(s, g); | 
|---|
|  | 1740 | } | 
|---|
|  | 1741 | else if (!*s) | 
|---|
|  | 1742 | { | 
|---|
|  | 1743 | return 0; | 
|---|
|  | 1744 | } | 
|---|
|  | 1745 | else ++s; | 
|---|
|  | 1746 | } | 
|---|
|  | 1747 | } | 
|---|
|  | 1748 |  | 
|---|
|  | 1749 | static char_t* parse_wconv(char_t* s, char_t end_quote) | 
|---|
|  | 1750 | { | 
|---|
|  | 1751 | gap g; | 
|---|
|  | 1752 |  | 
|---|
|  | 1753 | while (true) | 
|---|
|  | 1754 | { | 
|---|
|  | 1755 | while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s; | 
|---|
|  | 1756 |  | 
|---|
|  | 1757 | if (*s == end_quote) | 
|---|
|  | 1758 | { | 
|---|
|  | 1759 | *g.flush(s) = 0; | 
|---|
|  | 1760 |  | 
|---|
|  | 1761 | return s + 1; | 
|---|
|  | 1762 | } | 
|---|
|  | 1763 | else if (IS_CHARTYPE(*s, ct_space)) | 
|---|
|  | 1764 | { | 
|---|
|  | 1765 | if (*s == '\r') | 
|---|
|  | 1766 | { | 
|---|
|  | 1767 | *s++ = ' '; | 
|---|
|  | 1768 |  | 
|---|
|  | 1769 | if (*s == '\n') g.push(s, 1); | 
|---|
|  | 1770 | } | 
|---|
|  | 1771 | else *s++ = ' '; | 
|---|
|  | 1772 | } | 
|---|
|  | 1773 | else if (opt_escape::value && *s == '&') | 
|---|
|  | 1774 | { | 
|---|
|  | 1775 | s = strconv_escape(s, g); | 
|---|
|  | 1776 | } | 
|---|
|  | 1777 | else if (!*s) | 
|---|
|  | 1778 | { | 
|---|
|  | 1779 | return 0; | 
|---|
|  | 1780 | } | 
|---|
|  | 1781 | else ++s; | 
|---|
|  | 1782 | } | 
|---|
|  | 1783 | } | 
|---|
|  | 1784 |  | 
|---|
|  | 1785 | static char_t* parse_eol(char_t* s, char_t end_quote) | 
|---|
|  | 1786 | { | 
|---|
|  | 1787 | gap g; | 
|---|
|  | 1788 |  | 
|---|
|  | 1789 | while (true) | 
|---|
|  | 1790 | { | 
|---|
|  | 1791 | while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; | 
|---|
|  | 1792 |  | 
|---|
|  | 1793 | if (*s == end_quote) | 
|---|
|  | 1794 | { | 
|---|
|  | 1795 | *g.flush(s) = 0; | 
|---|
|  | 1796 |  | 
|---|
|  | 1797 | return s + 1; | 
|---|
|  | 1798 | } | 
|---|
|  | 1799 | else if (*s == '\r') | 
|---|
|  | 1800 | { | 
|---|
|  | 1801 | *s++ = '\n'; | 
|---|
|  | 1802 |  | 
|---|
|  | 1803 | if (*s == '\n') g.push(s, 1); | 
|---|
|  | 1804 | } | 
|---|
|  | 1805 | else if (opt_escape::value && *s == '&') | 
|---|
|  | 1806 | { | 
|---|
|  | 1807 | s = strconv_escape(s, g); | 
|---|
|  | 1808 | } | 
|---|
|  | 1809 | else if (!*s) | 
|---|
|  | 1810 | { | 
|---|
|  | 1811 | return 0; | 
|---|
|  | 1812 | } | 
|---|
|  | 1813 | else ++s; | 
|---|
|  | 1814 | } | 
|---|
|  | 1815 | } | 
|---|
|  | 1816 |  | 
|---|
|  | 1817 | static char_t* parse_simple(char_t* s, char_t end_quote) | 
|---|
|  | 1818 | { | 
|---|
|  | 1819 | gap g; | 
|---|
|  | 1820 |  | 
|---|
|  | 1821 | while (true) | 
|---|
|  | 1822 | { | 
|---|
|  | 1823 | while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; | 
|---|
|  | 1824 |  | 
|---|
|  | 1825 | if (*s == end_quote) | 
|---|
|  | 1826 | { | 
|---|
|  | 1827 | *g.flush(s) = 0; | 
|---|
|  | 1828 |  | 
|---|
|  | 1829 | return s + 1; | 
|---|
|  | 1830 | } | 
|---|
|  | 1831 | else if (opt_escape::value && *s == '&') | 
|---|
|  | 1832 | { | 
|---|
|  | 1833 | s = strconv_escape(s, g); | 
|---|
|  | 1834 | } | 
|---|
|  | 1835 | else if (!*s) | 
|---|
|  | 1836 | { | 
|---|
|  | 1837 | return 0; | 
|---|
|  | 1838 | } | 
|---|
|  | 1839 | else ++s; | 
|---|
|  | 1840 | } | 
|---|
|  | 1841 | } | 
|---|
|  | 1842 | }; | 
|---|
|  | 1843 |  | 
|---|
|  | 1844 | strconv_attribute_t get_strconv_attribute(unsigned int optmask) | 
|---|
|  | 1845 | { | 
|---|
|  | 1846 | STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); | 
|---|
|  | 1847 |  | 
|---|
|  | 1848 | switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) | 
|---|
|  | 1849 | { | 
|---|
|  | 1850 | case 0:  return strconv_attribute_impl<opt_false>::parse_simple; | 
|---|
|  | 1851 | case 1:  return strconv_attribute_impl<opt_true>::parse_simple; | 
|---|
|  | 1852 | case 2:  return strconv_attribute_impl<opt_false>::parse_eol; | 
|---|
|  | 1853 | case 3:  return strconv_attribute_impl<opt_true>::parse_eol; | 
|---|
|  | 1854 | case 4:  return strconv_attribute_impl<opt_false>::parse_wconv; | 
|---|
|  | 1855 | case 5:  return strconv_attribute_impl<opt_true>::parse_wconv; | 
|---|
|  | 1856 | case 6:  return strconv_attribute_impl<opt_false>::parse_wconv; | 
|---|
|  | 1857 | case 7:  return strconv_attribute_impl<opt_true>::parse_wconv; | 
|---|
|  | 1858 | case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm; | 
|---|
|  | 1859 | case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm; | 
|---|
|  | 1860 | case 10: return strconv_attribute_impl<opt_false>::parse_wnorm; | 
|---|
|  | 1861 | case 11: return strconv_attribute_impl<opt_true>::parse_wnorm; | 
|---|
|  | 1862 | case 12: return strconv_attribute_impl<opt_false>::parse_wnorm; | 
|---|
|  | 1863 | case 13: return strconv_attribute_impl<opt_true>::parse_wnorm; | 
|---|
|  | 1864 | case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; | 
|---|
|  | 1865 | case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; | 
|---|
|  | 1866 | default: return 0; // should not get here | 
|---|
|  | 1867 | } | 
|---|
|  | 1868 | } | 
|---|
|  | 1869 |  | 
|---|
|  | 1870 | inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) | 
|---|
|  | 1871 | { | 
|---|
|  | 1872 | xml_parse_result result; | 
|---|
|  | 1873 | result.status = status; | 
|---|
|  | 1874 | result.offset = offset; | 
|---|
|  | 1875 |  | 
|---|
|  | 1876 | return result; | 
|---|
|  | 1877 | } | 
|---|
|  | 1878 |  | 
|---|
|  | 1879 | struct xml_parser | 
|---|
|  | 1880 | { | 
|---|
|  | 1881 | xml_allocator alloc; | 
|---|
|  | 1882 | char_t* error_offset; | 
|---|
|  | 1883 | jmp_buf error_handler; | 
|---|
|  | 1884 |  | 
|---|
|  | 1885 | // Parser utilities. | 
|---|
|  | 1886 | #define SKIPWS()                        { while (IS_CHARTYPE(*s, ct_space)) ++s; } | 
|---|
|  | 1887 | #define OPTSET(OPT)                     ( optmsk & OPT ) | 
|---|
|  | 1888 | #define PUSHNODE(TYPE)          { cursor = append_node(cursor, alloc, TYPE); if (!cursor) THROW_ERROR(status_out_of_memory, s); } | 
|---|
|  | 1889 | #define POPNODE()                       { cursor = cursor->parent; } | 
|---|
|  | 1890 | #define SCANFOR(X)                      { while (*s != 0 && !(X)) ++s; } | 
|---|
|  | 1891 | #define SCANWHILE(X)            { while ((X)) ++s; } | 
|---|
|  | 1892 | #define ENDSEG()                        { ch = *s; *s = 0; ++s; } | 
|---|
|  | 1893 | #define THROW_ERROR(err, m)     error_offset = m, longjmp(error_handler, err) | 
|---|
|  | 1894 | #define CHECK_ERROR(err, m)     { if (*s == 0) THROW_ERROR(err, m); } | 
|---|
|  | 1895 |  | 
|---|
|  | 1896 | xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0) | 
|---|
|  | 1897 | { | 
|---|
|  | 1898 | } | 
|---|
|  | 1899 |  | 
|---|
|  | 1900 | // DOCTYPE consists of nested sections of the following possible types: | 
|---|
|  | 1901 | // <!-- ... -->, <? ... ?>, "...", '...' | 
|---|
|  | 1902 | // <![...]]> | 
|---|
|  | 1903 | // <!...> | 
|---|
|  | 1904 | // First group can not contain nested groups | 
|---|
|  | 1905 | // Second group can contain nested groups of the same type | 
|---|
|  | 1906 | // Third group can contain all other groups | 
|---|
|  | 1907 | char_t* parse_doctype_primitive(char_t* s) | 
|---|
|  | 1908 | { | 
|---|
|  | 1909 | if (*s == '"' || *s == '\'') | 
|---|
|  | 1910 | { | 
|---|
|  | 1911 | // quoted string | 
|---|
|  | 1912 | char_t ch = *s++; | 
|---|
|  | 1913 | SCANFOR(*s == ch); | 
|---|
|  | 1914 | if (!*s) THROW_ERROR(status_bad_doctype, s); | 
|---|
|  | 1915 |  | 
|---|
|  | 1916 | s++; | 
|---|
|  | 1917 | } | 
|---|
|  | 1918 | else if (s[0] == '<' && s[1] == '?') | 
|---|
|  | 1919 | { | 
|---|
|  | 1920 | // <? ... ?> | 
|---|
|  | 1921 | s += 2; | 
|---|
|  | 1922 | SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype | 
|---|
|  | 1923 | if (!*s) THROW_ERROR(status_bad_doctype, s); | 
|---|
|  | 1924 |  | 
|---|
|  | 1925 | s += 2; | 
|---|
|  | 1926 | } | 
|---|
|  | 1927 | else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') | 
|---|
|  | 1928 | { | 
|---|
|  | 1929 | s += 4; | 
|---|
|  | 1930 | SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype | 
|---|
|  | 1931 | if (!*s) THROW_ERROR(status_bad_doctype, s); | 
|---|
|  | 1932 |  | 
|---|
|  | 1933 | s += 4; | 
|---|
|  | 1934 | } | 
|---|
|  | 1935 | else THROW_ERROR(status_bad_doctype, s); | 
|---|
|  | 1936 |  | 
|---|
|  | 1937 | return s; | 
|---|
|  | 1938 | } | 
|---|
|  | 1939 |  | 
|---|
|  | 1940 | char_t* parse_doctype_ignore(char_t* s) | 
|---|
|  | 1941 | { | 
|---|
|  | 1942 | assert(s[0] == '<' && s[1] == '!' && s[2] == '['); | 
|---|
|  | 1943 | s++; | 
|---|
|  | 1944 |  | 
|---|
|  | 1945 | while (*s) | 
|---|
|  | 1946 | { | 
|---|
|  | 1947 | if (s[0] == '<' && s[1] == '!' && s[2] == '[') | 
|---|
|  | 1948 | { | 
|---|
|  | 1949 | // nested ignore section | 
|---|
|  | 1950 | s = parse_doctype_ignore(s); | 
|---|
|  | 1951 | } | 
|---|
|  | 1952 | else if (s[0] == ']' && s[1] == ']' && s[2] == '>') | 
|---|
|  | 1953 | { | 
|---|
|  | 1954 | // ignore section end | 
|---|
|  | 1955 | s += 3; | 
|---|
|  | 1956 |  | 
|---|
|  | 1957 | return s; | 
|---|
|  | 1958 | } | 
|---|
|  | 1959 | else s++; | 
|---|
|  | 1960 | } | 
|---|
|  | 1961 |  | 
|---|
|  | 1962 | THROW_ERROR(status_bad_doctype, s); | 
|---|
|  | 1963 |  | 
|---|
|  | 1964 | return s; | 
|---|
|  | 1965 | } | 
|---|
|  | 1966 |  | 
|---|
|  | 1967 | char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel) | 
|---|
|  | 1968 | { | 
|---|
|  | 1969 | assert(s[0] == '<' && s[1] == '!'); | 
|---|
|  | 1970 | s++; | 
|---|
|  | 1971 |  | 
|---|
|  | 1972 | while (*s) | 
|---|
|  | 1973 | { | 
|---|
|  | 1974 | if (s[0] == '<' && s[1] == '!' && s[2] != '-') | 
|---|
|  | 1975 | { | 
|---|
|  | 1976 | if (s[2] == '[') | 
|---|
|  | 1977 | { | 
|---|
|  | 1978 | // ignore | 
|---|
|  | 1979 | s = parse_doctype_ignore(s); | 
|---|
|  | 1980 | } | 
|---|
|  | 1981 | else | 
|---|
|  | 1982 | { | 
|---|
|  | 1983 | // some control group | 
|---|
|  | 1984 | s = parse_doctype_group(s, endch, false); | 
|---|
|  | 1985 | } | 
|---|
|  | 1986 | } | 
|---|
|  | 1987 | else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') | 
|---|
|  | 1988 | { | 
|---|
|  | 1989 | // unknown tag (forbidden), or some primitive group | 
|---|
|  | 1990 | s = parse_doctype_primitive(s); | 
|---|
|  | 1991 | } | 
|---|
|  | 1992 | else if (*s == '>') | 
|---|
|  | 1993 | { | 
|---|
|  | 1994 | s++; | 
|---|
|  | 1995 |  | 
|---|
|  | 1996 | return s; | 
|---|
|  | 1997 | } | 
|---|
|  | 1998 | else s++; | 
|---|
|  | 1999 | } | 
|---|
|  | 2000 |  | 
|---|
|  | 2001 | if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s); | 
|---|
|  | 2002 |  | 
|---|
|  | 2003 | return s; | 
|---|
|  | 2004 | } | 
|---|
|  | 2005 |  | 
|---|
|  | 2006 | char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) | 
|---|
|  | 2007 | { | 
|---|
|  | 2008 | // parse node contents, starting with exclamation mark | 
|---|
|  | 2009 | ++s; | 
|---|
|  | 2010 |  | 
|---|
|  | 2011 | if (*s == '-') // '<!-...' | 
|---|
|  | 2012 | { | 
|---|
|  | 2013 | ++s; | 
|---|
|  | 2014 |  | 
|---|
|  | 2015 | if (*s == '-') // '<!--...' | 
|---|
|  | 2016 | { | 
|---|
|  | 2017 | ++s; | 
|---|
|  | 2018 |  | 
|---|
|  | 2019 | if (OPTSET(parse_comments)) | 
|---|
|  | 2020 | { | 
|---|
|  | 2021 | PUSHNODE(node_comment); // Append a new node on the tree. | 
|---|
|  | 2022 | cursor->value = s; // Save the offset. | 
|---|
|  | 2023 | } | 
|---|
|  | 2024 |  | 
|---|
|  | 2025 | if (OPTSET(parse_eol) && OPTSET(parse_comments)) | 
|---|
|  | 2026 | { | 
|---|
|  | 2027 | s = strconv_comment(s, endch); | 
|---|
|  | 2028 |  | 
|---|
|  | 2029 | if (!s) THROW_ERROR(status_bad_comment, cursor->value); | 
|---|
|  | 2030 | } | 
|---|
|  | 2031 | else | 
|---|
|  | 2032 | { | 
|---|
|  | 2033 | // Scan for terminating '-->'. | 
|---|
|  | 2034 | SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')); | 
|---|
|  | 2035 | CHECK_ERROR(status_bad_comment, s); | 
|---|
|  | 2036 |  | 
|---|
|  | 2037 | if (OPTSET(parse_comments)) | 
|---|
|  | 2038 | *s = 0; // Zero-terminate this segment at the first terminating '-'. | 
|---|
|  | 2039 |  | 
|---|
|  | 2040 | s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. | 
|---|
|  | 2041 | } | 
|---|
|  | 2042 | } | 
|---|
|  | 2043 | else THROW_ERROR(status_bad_comment, s); | 
|---|
|  | 2044 | } | 
|---|
|  | 2045 | else if (*s == '[') | 
|---|
|  | 2046 | { | 
|---|
|  | 2047 | // '<![CDATA[...' | 
|---|
|  | 2048 | if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') | 
|---|
|  | 2049 | { | 
|---|
|  | 2050 | ++s; | 
|---|
|  | 2051 |  | 
|---|
|  | 2052 | if (OPTSET(parse_cdata)) | 
|---|
|  | 2053 | { | 
|---|
|  | 2054 | PUSHNODE(node_cdata); // Append a new node on the tree. | 
|---|
|  | 2055 | cursor->value = s; // Save the offset. | 
|---|
|  | 2056 |  | 
|---|
|  | 2057 | if (OPTSET(parse_eol)) | 
|---|
|  | 2058 | { | 
|---|
|  | 2059 | s = strconv_cdata(s, endch); | 
|---|
|  | 2060 |  | 
|---|
|  | 2061 | if (!s) THROW_ERROR(status_bad_cdata, cursor->value); | 
|---|
|  | 2062 | } | 
|---|
|  | 2063 | else | 
|---|
|  | 2064 | { | 
|---|
|  | 2065 | // Scan for terminating ']]>'. | 
|---|
|  | 2066 | SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); | 
|---|
|  | 2067 | CHECK_ERROR(status_bad_cdata, s); | 
|---|
|  | 2068 |  | 
|---|
|  | 2069 | *s++ = 0; // Zero-terminate this segment. | 
|---|
|  | 2070 | } | 
|---|
|  | 2071 | } | 
|---|
|  | 2072 | else // Flagged for discard, but we still have to scan for the terminator. | 
|---|
|  | 2073 | { | 
|---|
|  | 2074 | // Scan for terminating ']]>'. | 
|---|
|  | 2075 | SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); | 
|---|
|  | 2076 | CHECK_ERROR(status_bad_cdata, s); | 
|---|
|  | 2077 |  | 
|---|
|  | 2078 | ++s; | 
|---|
|  | 2079 | } | 
|---|
|  | 2080 |  | 
|---|
|  | 2081 | s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. | 
|---|
|  | 2082 | } | 
|---|
|  | 2083 | else THROW_ERROR(status_bad_cdata, s); | 
|---|
|  | 2084 | } | 
|---|
|  | 2085 | else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E')) | 
|---|
|  | 2086 | { | 
|---|
|  | 2087 | s -= 2; | 
|---|
|  | 2088 |  | 
|---|
|  | 2089 | if (cursor->parent) THROW_ERROR(status_bad_doctype, s); | 
|---|
|  | 2090 |  | 
|---|
|  | 2091 | char_t* mark = s + 9; | 
|---|
|  | 2092 |  | 
|---|
|  | 2093 | s = parse_doctype_group(s, endch, true); | 
|---|
|  | 2094 |  | 
|---|
|  | 2095 | if (OPTSET(parse_doctype)) | 
|---|
|  | 2096 | { | 
|---|
|  | 2097 | while (IS_CHARTYPE(*mark, ct_space)) ++mark; | 
|---|
|  | 2098 |  | 
|---|
|  | 2099 | PUSHNODE(node_doctype); | 
|---|
|  | 2100 |  | 
|---|
|  | 2101 | cursor->value = mark; | 
|---|
|  | 2102 |  | 
|---|
|  | 2103 | assert((s[0] == 0 && endch == '>') || s[-1] == '>'); | 
|---|
|  | 2104 | s[*s == 0 ? 0 : -1] = 0; | 
|---|
|  | 2105 |  | 
|---|
|  | 2106 | POPNODE(); | 
|---|
|  | 2107 | } | 
|---|
|  | 2108 | } | 
|---|
|  | 2109 | else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s); | 
|---|
|  | 2110 | else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s); | 
|---|
|  | 2111 | else THROW_ERROR(status_unrecognized_tag, s); | 
|---|
|  | 2112 |  | 
|---|
|  | 2113 | return s; | 
|---|
|  | 2114 | } | 
|---|
|  | 2115 |  | 
|---|
|  | 2116 | char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) | 
|---|
|  | 2117 | { | 
|---|
|  | 2118 | // load into registers | 
|---|
|  | 2119 | xml_node_struct* cursor = ref_cursor; | 
|---|
|  | 2120 | char_t ch = 0; | 
|---|
|  | 2121 |  | 
|---|
|  | 2122 | // parse node contents, starting with question mark | 
|---|
|  | 2123 | ++s; | 
|---|
|  | 2124 |  | 
|---|
|  | 2125 | // read PI target | 
|---|
|  | 2126 | char_t* target = s; | 
|---|
|  | 2127 |  | 
|---|
|  | 2128 | if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s); | 
|---|
|  | 2129 |  | 
|---|
|  | 2130 | SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); | 
|---|
|  | 2131 | CHECK_ERROR(status_bad_pi, s); | 
|---|
|  | 2132 |  | 
|---|
|  | 2133 | // determine node type; stricmp / strcasecmp is not portable | 
|---|
|  | 2134 | bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; | 
|---|
|  | 2135 |  | 
|---|
|  | 2136 | if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi)) | 
|---|
|  | 2137 | { | 
|---|
|  | 2138 | if (declaration) | 
|---|
|  | 2139 | { | 
|---|
|  | 2140 | // disallow non top-level declarations | 
|---|
|  | 2141 | if (cursor->parent) THROW_ERROR(status_bad_pi, s); | 
|---|
|  | 2142 |  | 
|---|
|  | 2143 | PUSHNODE(node_declaration); | 
|---|
|  | 2144 | } | 
|---|
|  | 2145 | else | 
|---|
|  | 2146 | { | 
|---|
|  | 2147 | PUSHNODE(node_pi); | 
|---|
|  | 2148 | } | 
|---|
|  | 2149 |  | 
|---|
|  | 2150 | cursor->name = target; | 
|---|
|  | 2151 |  | 
|---|
|  | 2152 | ENDSEG(); | 
|---|
|  | 2153 |  | 
|---|
|  | 2154 | // parse value/attributes | 
|---|
|  | 2155 | if (ch == '?') | 
|---|
|  | 2156 | { | 
|---|
|  | 2157 | // empty node | 
|---|
|  | 2158 | if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); | 
|---|
|  | 2159 | s += (*s == '>'); | 
|---|
|  | 2160 |  | 
|---|
|  | 2161 | POPNODE(); | 
|---|
|  | 2162 | } | 
|---|
|  | 2163 | else if (IS_CHARTYPE(ch, ct_space)) | 
|---|
|  | 2164 | { | 
|---|
|  | 2165 | SKIPWS(); | 
|---|
|  | 2166 |  | 
|---|
|  | 2167 | // scan for tag end | 
|---|
|  | 2168 | char_t* value = s; | 
|---|
|  | 2169 |  | 
|---|
|  | 2170 | SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); | 
|---|
|  | 2171 | CHECK_ERROR(status_bad_pi, s); | 
|---|
|  | 2172 |  | 
|---|
|  | 2173 | if (declaration) | 
|---|
|  | 2174 | { | 
|---|
|  | 2175 | // replace ending ? with / so that 'element' terminates properly | 
|---|
|  | 2176 | *s = '/'; | 
|---|
|  | 2177 |  | 
|---|
|  | 2178 | // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES | 
|---|
|  | 2179 | s = value; | 
|---|
|  | 2180 | } | 
|---|
|  | 2181 | else | 
|---|
|  | 2182 | { | 
|---|
|  | 2183 | // store value and step over > | 
|---|
|  | 2184 | cursor->value = value; | 
|---|
|  | 2185 | POPNODE(); | 
|---|
|  | 2186 |  | 
|---|
|  | 2187 | ENDSEG(); | 
|---|
|  | 2188 |  | 
|---|
|  | 2189 | s += (*s == '>'); | 
|---|
|  | 2190 | } | 
|---|
|  | 2191 | } | 
|---|
|  | 2192 | else THROW_ERROR(status_bad_pi, s); | 
|---|
|  | 2193 | } | 
|---|
|  | 2194 | else | 
|---|
|  | 2195 | { | 
|---|
|  | 2196 | // scan for tag end | 
|---|
|  | 2197 | SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); | 
|---|
|  | 2198 | CHECK_ERROR(status_bad_pi, s); | 
|---|
|  | 2199 |  | 
|---|
|  | 2200 | s += (s[1] == '>' ? 2 : 1); | 
|---|
|  | 2201 | } | 
|---|
|  | 2202 |  | 
|---|
|  | 2203 | // store from registers | 
|---|
|  | 2204 | ref_cursor = cursor; | 
|---|
|  | 2205 |  | 
|---|
|  | 2206 | return s; | 
|---|
|  | 2207 | } | 
|---|
|  | 2208 |  | 
|---|
|  | 2209 | void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) | 
|---|
|  | 2210 | { | 
|---|
|  | 2211 | strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); | 
|---|
|  | 2212 | strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); | 
|---|
|  | 2213 |  | 
|---|
|  | 2214 | char_t ch = 0; | 
|---|
|  | 2215 | xml_node_struct* cursor = xmldoc; | 
|---|
|  | 2216 | char_t* mark = s; | 
|---|
|  | 2217 |  | 
|---|
|  | 2218 | while (*s != 0) | 
|---|
|  | 2219 | { | 
|---|
|  | 2220 | if (*s == '<') | 
|---|
|  | 2221 | { | 
|---|
|  | 2222 | ++s; | 
|---|
|  | 2223 |  | 
|---|
|  | 2224 | LOC_TAG: | 
|---|
|  | 2225 | if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' | 
|---|
|  | 2226 | { | 
|---|
|  | 2227 | PUSHNODE(node_element); // Append a new node to the tree. | 
|---|
|  | 2228 |  | 
|---|
|  | 2229 | cursor->name = s; | 
|---|
|  | 2230 |  | 
|---|
|  | 2231 | SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. | 
|---|
|  | 2232 | ENDSEG(); // Save char in 'ch', terminate & step over. | 
|---|
|  | 2233 |  | 
|---|
|  | 2234 | if (ch == '>') | 
|---|
|  | 2235 | { | 
|---|
|  | 2236 | // end of tag | 
|---|
|  | 2237 | } | 
|---|
|  | 2238 | else if (IS_CHARTYPE(ch, ct_space)) | 
|---|
|  | 2239 | { | 
|---|
|  | 2240 | LOC_ATTRIBUTES: | 
|---|
|  | 2241 | while (true) | 
|---|
|  | 2242 | { | 
|---|
|  | 2243 | SKIPWS(); // Eat any whitespace. | 
|---|
|  | 2244 |  | 
|---|
|  | 2245 | if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #... | 
|---|
|  | 2246 | { | 
|---|
|  | 2247 | xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. | 
|---|
|  | 2248 | if (!a) THROW_ERROR(status_out_of_memory, s); | 
|---|
|  | 2249 |  | 
|---|
|  | 2250 | a->name = s; // Save the offset. | 
|---|
|  | 2251 |  | 
|---|
|  | 2252 | SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. | 
|---|
|  | 2253 | CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance | 
|---|
|  | 2254 |  | 
|---|
|  | 2255 | ENDSEG(); // Save char in 'ch', terminate & step over. | 
|---|
|  | 2256 | CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance | 
|---|
|  | 2257 |  | 
|---|
|  | 2258 | if (IS_CHARTYPE(ch, ct_space)) | 
|---|
|  | 2259 | { | 
|---|
|  | 2260 | SKIPWS(); // Eat any whitespace. | 
|---|
|  | 2261 | CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance | 
|---|
|  | 2262 |  | 
|---|
|  | 2263 | ch = *s; | 
|---|
|  | 2264 | ++s; | 
|---|
|  | 2265 | } | 
|---|
|  | 2266 |  | 
|---|
|  | 2267 | if (ch == '=') // '<... #=...' | 
|---|
|  | 2268 | { | 
|---|
|  | 2269 | SKIPWS(); // Eat any whitespace. | 
|---|
|  | 2270 |  | 
|---|
|  | 2271 | if (*s == '"' || *s == '\'') // '<... #="...' | 
|---|
|  | 2272 | { | 
|---|
|  | 2273 | ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. | 
|---|
|  | 2274 | ++s; // Step over the quote. | 
|---|
|  | 2275 | a->value = s; // Save the offset. | 
|---|
|  | 2276 |  | 
|---|
|  | 2277 | s = strconv_attribute(s, ch); | 
|---|
|  | 2278 |  | 
|---|
|  | 2279 | if (!s) THROW_ERROR(status_bad_attribute, a->value); | 
|---|
|  | 2280 |  | 
|---|
|  | 2281 | // After this line the loop continues from the start; | 
|---|
|  | 2282 | // Whitespaces, / and > are ok, symbols and EOF are wrong, | 
|---|
|  | 2283 | // everything else will be detected | 
|---|
|  | 2284 | if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); | 
|---|
|  | 2285 | } | 
|---|
|  | 2286 | else THROW_ERROR(status_bad_attribute, s); | 
|---|
|  | 2287 | } | 
|---|
|  | 2288 | else THROW_ERROR(status_bad_attribute, s); | 
|---|
|  | 2289 | } | 
|---|
|  | 2290 | else if (*s == '/') | 
|---|
|  | 2291 | { | 
|---|
|  | 2292 | ++s; | 
|---|
|  | 2293 |  | 
|---|
|  | 2294 | if (*s == '>') | 
|---|
|  | 2295 | { | 
|---|
|  | 2296 | POPNODE(); | 
|---|
|  | 2297 | s++; | 
|---|
|  | 2298 | break; | 
|---|
|  | 2299 | } | 
|---|
|  | 2300 | else if (*s == 0 && endch == '>') | 
|---|
|  | 2301 | { | 
|---|
|  | 2302 | POPNODE(); | 
|---|
|  | 2303 | break; | 
|---|
|  | 2304 | } | 
|---|
|  | 2305 | else THROW_ERROR(status_bad_start_element, s); | 
|---|
|  | 2306 | } | 
|---|
|  | 2307 | else if (*s == '>') | 
|---|
|  | 2308 | { | 
|---|
|  | 2309 | ++s; | 
|---|
|  | 2310 |  | 
|---|
|  | 2311 | break; | 
|---|
|  | 2312 | } | 
|---|
|  | 2313 | else if (*s == 0 && endch == '>') | 
|---|
|  | 2314 | { | 
|---|
|  | 2315 | break; | 
|---|
|  | 2316 | } | 
|---|
|  | 2317 | else THROW_ERROR(status_bad_start_element, s); | 
|---|
|  | 2318 | } | 
|---|
|  | 2319 |  | 
|---|
|  | 2320 | // !!! | 
|---|
|  | 2321 | } | 
|---|
|  | 2322 | else if (ch == '/') // '<#.../' | 
|---|
|  | 2323 | { | 
|---|
|  | 2324 | if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s); | 
|---|
|  | 2325 |  | 
|---|
|  | 2326 | POPNODE(); // Pop. | 
|---|
|  | 2327 |  | 
|---|
|  | 2328 | s += (*s == '>'); | 
|---|
|  | 2329 | } | 
|---|
|  | 2330 | else if (ch == 0) | 
|---|
|  | 2331 | { | 
|---|
|  | 2332 | // we stepped over null terminator, backtrack & handle closing tag | 
|---|
|  | 2333 | --s; | 
|---|
|  | 2334 |  | 
|---|
|  | 2335 | if (endch != '>') THROW_ERROR(status_bad_start_element, s); | 
|---|
|  | 2336 | } | 
|---|
|  | 2337 | else THROW_ERROR(status_bad_start_element, s); | 
|---|
|  | 2338 | } | 
|---|
|  | 2339 | else if (*s == '/') | 
|---|
|  | 2340 | { | 
|---|
|  | 2341 | ++s; | 
|---|
|  | 2342 |  | 
|---|
|  | 2343 | char_t* name = cursor->name; | 
|---|
|  | 2344 | if (!name) THROW_ERROR(status_end_element_mismatch, s); | 
|---|
|  | 2345 |  | 
|---|
|  | 2346 | while (IS_CHARTYPE(*s, ct_symbol)) | 
|---|
|  | 2347 | { | 
|---|
|  | 2348 | if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s); | 
|---|
|  | 2349 | } | 
|---|
|  | 2350 |  | 
|---|
|  | 2351 | if (*name) | 
|---|
|  | 2352 | { | 
|---|
|  | 2353 | if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s); | 
|---|
|  | 2354 | else THROW_ERROR(status_end_element_mismatch, s); | 
|---|
|  | 2355 | } | 
|---|
|  | 2356 |  | 
|---|
|  | 2357 | POPNODE(); // Pop. | 
|---|
|  | 2358 |  | 
|---|
|  | 2359 | SKIPWS(); | 
|---|
|  | 2360 |  | 
|---|
|  | 2361 | if (*s == 0) | 
|---|
|  | 2362 | { | 
|---|
|  | 2363 | if (endch != '>') THROW_ERROR(status_bad_end_element, s); | 
|---|
|  | 2364 | } | 
|---|
|  | 2365 | else | 
|---|
|  | 2366 | { | 
|---|
|  | 2367 | if (*s != '>') THROW_ERROR(status_bad_end_element, s); | 
|---|
|  | 2368 | ++s; | 
|---|
|  | 2369 | } | 
|---|
|  | 2370 | } | 
|---|
|  | 2371 | else if (*s == '?') // '<?...' | 
|---|
|  | 2372 | { | 
|---|
|  | 2373 | s = parse_question(s, cursor, optmsk, endch); | 
|---|
|  | 2374 |  | 
|---|
|  | 2375 | assert(cursor); | 
|---|
|  | 2376 | if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES; | 
|---|
|  | 2377 | } | 
|---|
|  | 2378 | else if (*s == '!') // '<!...' | 
|---|
|  | 2379 | { | 
|---|
|  | 2380 | s = parse_exclamation(s, cursor, optmsk, endch); | 
|---|
|  | 2381 | } | 
|---|
|  | 2382 | else if (*s == 0 && endch == '?') THROW_ERROR(status_bad_pi, s); | 
|---|
|  | 2383 | else THROW_ERROR(status_unrecognized_tag, s); | 
|---|
|  | 2384 | } | 
|---|
|  | 2385 | else | 
|---|
|  | 2386 | { | 
|---|
|  | 2387 | mark = s; // Save this offset while searching for a terminator. | 
|---|
|  | 2388 |  | 
|---|
|  | 2389 | SKIPWS(); // Eat whitespace if no genuine PCDATA here. | 
|---|
|  | 2390 |  | 
|---|
|  | 2391 | if ((!OPTSET(parse_ws_pcdata) || mark == s) && (*s == '<' || !*s)) | 
|---|
|  | 2392 | { | 
|---|
|  | 2393 | continue; | 
|---|
|  | 2394 | } | 
|---|
|  | 2395 |  | 
|---|
|  | 2396 | s = mark; | 
|---|
|  | 2397 |  | 
|---|
|  | 2398 | if (cursor->parent) | 
|---|
|  | 2399 | { | 
|---|
|  | 2400 | PUSHNODE(node_pcdata); // Append a new node on the tree. | 
|---|
|  | 2401 | cursor->value = s; // Save the offset. | 
|---|
|  | 2402 |  | 
|---|
|  | 2403 | s = strconv_pcdata(s); | 
|---|
|  | 2404 |  | 
|---|
|  | 2405 | POPNODE(); // Pop since this is a standalone. | 
|---|
|  | 2406 |  | 
|---|
|  | 2407 | if (!*s) break; | 
|---|
|  | 2408 | } | 
|---|
|  | 2409 | else | 
|---|
|  | 2410 | { | 
|---|
|  | 2411 | SCANFOR(*s == '<'); // '...<' | 
|---|
|  | 2412 | if (!*s) break; | 
|---|
|  | 2413 |  | 
|---|
|  | 2414 | ++s; | 
|---|
|  | 2415 | } | 
|---|
|  | 2416 |  | 
|---|
|  | 2417 | // We're after '<' | 
|---|
|  | 2418 | goto LOC_TAG; | 
|---|
|  | 2419 | } | 
|---|
|  | 2420 | } | 
|---|
|  | 2421 |  | 
|---|
|  | 2422 | // check that last tag is closed | 
|---|
|  | 2423 | if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s); | 
|---|
|  | 2424 | } | 
|---|
|  | 2425 |  | 
|---|
|  | 2426 | static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk) | 
|---|
|  | 2427 | { | 
|---|
|  | 2428 | xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root); | 
|---|
|  | 2429 |  | 
|---|
|  | 2430 | // store buffer for offset_debug | 
|---|
|  | 2431 | xmldoc->buffer = buffer; | 
|---|
|  | 2432 |  | 
|---|
|  | 2433 | // early-out for empty documents | 
|---|
|  | 2434 | if (length == 0) return make_parse_result(status_ok); | 
|---|
|  | 2435 |  | 
|---|
|  | 2436 | // create parser on stack | 
|---|
|  | 2437 | xml_parser parser(*xmldoc); | 
|---|
|  | 2438 |  | 
|---|
|  | 2439 | // save last character and make buffer zero-terminated (speeds up parsing) | 
|---|
|  | 2440 | char_t endch = buffer[length - 1]; | 
|---|
|  | 2441 | buffer[length - 1] = 0; | 
|---|
|  | 2442 |  | 
|---|
|  | 2443 | // perform actual parsing | 
|---|
|  | 2444 | int error = setjmp(parser.error_handler); | 
|---|
|  | 2445 |  | 
|---|
|  | 2446 | if (error == 0) | 
|---|
|  | 2447 | { | 
|---|
|  | 2448 | parser.parse(buffer, xmldoc, optmsk, endch); | 
|---|
|  | 2449 | } | 
|---|
|  | 2450 |  | 
|---|
|  | 2451 | xml_parse_result result = make_parse_result(static_cast<xml_parse_status>(error), parser.error_offset ? parser.error_offset - buffer : 0); | 
|---|
|  | 2452 | assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); | 
|---|
|  | 2453 |  | 
|---|
|  | 2454 | // update allocator state | 
|---|
|  | 2455 | *static_cast<xml_allocator*>(xmldoc) = parser.alloc; | 
|---|
|  | 2456 |  | 
|---|
|  | 2457 | // since we removed last character, we have to handle the only possible false positive | 
|---|
|  | 2458 | if (result && endch == '<') | 
|---|
|  | 2459 | { | 
|---|
|  | 2460 | // there's no possible well-formed document with < at the end | 
|---|
|  | 2461 | return make_parse_result(status_unrecognized_tag, length); | 
|---|
|  | 2462 | } | 
|---|
|  | 2463 |  | 
|---|
|  | 2464 | return result; | 
|---|
|  | 2465 | } | 
|---|
|  | 2466 | }; | 
|---|
|  | 2467 |  | 
|---|
|  | 2468 | // Output facilities | 
|---|
|  | 2469 | xml_encoding get_write_native_encoding() | 
|---|
|  | 2470 | { | 
|---|
|  | 2471 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 2472 | return get_wchar_encoding(); | 
|---|
|  | 2473 | #else | 
|---|
|  | 2474 | return encoding_utf8; | 
|---|
|  | 2475 | #endif | 
|---|
|  | 2476 | } | 
|---|
|  | 2477 |  | 
|---|
|  | 2478 | xml_encoding get_write_encoding(xml_encoding encoding) | 
|---|
|  | 2479 | { | 
|---|
|  | 2480 | // replace wchar encoding with utf implementation | 
|---|
|  | 2481 | if (encoding == encoding_wchar) return get_wchar_encoding(); | 
|---|
|  | 2482 |  | 
|---|
|  | 2483 | // replace utf16 encoding with utf16 with specific endianness | 
|---|
|  | 2484 | if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | 
|---|
|  | 2485 |  | 
|---|
|  | 2486 | // replace utf32 encoding with utf32 with specific endianness | 
|---|
|  | 2487 | if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | 
|---|
|  | 2488 |  | 
|---|
|  | 2489 | // only do autodetection if no explicit encoding is requested | 
|---|
|  | 2490 | if (encoding != encoding_auto) return encoding; | 
|---|
|  | 2491 |  | 
|---|
|  | 2492 | // assume utf8 encoding | 
|---|
|  | 2493 | return encoding_utf8; | 
|---|
|  | 2494 | } | 
|---|
|  | 2495 |  | 
|---|
|  | 2496 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 2497 | size_t get_valid_length(const char_t* data, size_t length) | 
|---|
|  | 2498 | { | 
|---|
|  | 2499 | assert(length > 0); | 
|---|
|  | 2500 |  | 
|---|
|  | 2501 | // discard last character if it's the lead of a surrogate pair | 
|---|
|  | 2502 | return (sizeof(wchar_t) == 2 && (unsigned)(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; | 
|---|
|  | 2503 | } | 
|---|
|  | 2504 |  | 
|---|
|  | 2505 | size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) | 
|---|
|  | 2506 | { | 
|---|
|  | 2507 | // only endian-swapping is required | 
|---|
|  | 2508 | if (need_endian_swap_utf(encoding, get_wchar_encoding())) | 
|---|
|  | 2509 | { | 
|---|
|  | 2510 | convert_wchar_endian_swap(reinterpret_cast<char_t*>(result), data, length); | 
|---|
|  | 2511 |  | 
|---|
|  | 2512 | return length * sizeof(char_t); | 
|---|
|  | 2513 | } | 
|---|
|  | 2514 |  | 
|---|
|  | 2515 | // convert to utf8 | 
|---|
|  | 2516 | if (encoding == encoding_utf8) | 
|---|
|  | 2517 | { | 
|---|
|  | 2518 | uint8_t* dest = reinterpret_cast<uint8_t*>(result); | 
|---|
|  | 2519 |  | 
|---|
|  | 2520 | uint8_t* end = sizeof(wchar_t) == 2 ? | 
|---|
|  | 2521 | utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(data), length, dest) : | 
|---|
|  | 2522 | utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(data), length, dest); | 
|---|
|  | 2523 |  | 
|---|
|  | 2524 | return static_cast<size_t>(end - dest); | 
|---|
|  | 2525 | } | 
|---|
|  | 2526 |  | 
|---|
|  | 2527 | // convert to utf16 | 
|---|
|  | 2528 | if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | 
|---|
|  | 2529 | { | 
|---|
|  | 2530 | uint16_t* dest = reinterpret_cast<uint16_t*>(result); | 
|---|
|  | 2531 |  | 
|---|
|  | 2532 | // convert to native utf16 | 
|---|
|  | 2533 | uint16_t* end = utf_decoder<utf16_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(data), length, dest); | 
|---|
|  | 2534 |  | 
|---|
|  | 2535 | // swap if necessary | 
|---|
|  | 2536 | xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | 
|---|
|  | 2537 |  | 
|---|
|  | 2538 | if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest)); | 
|---|
|  | 2539 |  | 
|---|
|  | 2540 | return static_cast<size_t>(end - dest) * sizeof(uint16_t); | 
|---|
|  | 2541 | } | 
|---|
|  | 2542 |  | 
|---|
|  | 2543 | // convert to utf32 | 
|---|
|  | 2544 | if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | 
|---|
|  | 2545 | { | 
|---|
|  | 2546 | uint32_t* dest = reinterpret_cast<uint32_t*>(result); | 
|---|
|  | 2547 |  | 
|---|
|  | 2548 | // convert to native utf32 | 
|---|
|  | 2549 | uint32_t* end = utf_decoder<utf32_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(data), length, dest); | 
|---|
|  | 2550 |  | 
|---|
|  | 2551 | // swap if necessary | 
|---|
|  | 2552 | xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | 
|---|
|  | 2553 |  | 
|---|
|  | 2554 | if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest)); | 
|---|
|  | 2555 |  | 
|---|
|  | 2556 | return static_cast<size_t>(end - dest) * sizeof(uint32_t); | 
|---|
|  | 2557 | } | 
|---|
|  | 2558 |  | 
|---|
|  | 2559 | assert(!"Invalid encoding"); | 
|---|
|  | 2560 | return 0; | 
|---|
|  | 2561 | } | 
|---|
|  | 2562 | #else | 
|---|
|  | 2563 | size_t get_valid_length(const char_t* data, size_t length) | 
|---|
|  | 2564 | { | 
|---|
|  | 2565 | assert(length > 4); | 
|---|
|  | 2566 |  | 
|---|
|  | 2567 | for (size_t i = 1; i <= 4; ++i) | 
|---|
|  | 2568 | { | 
|---|
|  | 2569 | uint8_t ch = static_cast<uint8_t>(data[length - i]); | 
|---|
|  | 2570 |  | 
|---|
|  | 2571 | // either a standalone character or a leading one | 
|---|
|  | 2572 | if ((ch & 0xc0) != 0x80) return length - i; | 
|---|
|  | 2573 | } | 
|---|
|  | 2574 |  | 
|---|
|  | 2575 | // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk | 
|---|
|  | 2576 | return length; | 
|---|
|  | 2577 | } | 
|---|
|  | 2578 |  | 
|---|
|  | 2579 | size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) | 
|---|
|  | 2580 | { | 
|---|
|  | 2581 | if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | 
|---|
|  | 2582 | { | 
|---|
|  | 2583 | uint16_t* dest = reinterpret_cast<uint16_t*>(result); | 
|---|
|  | 2584 |  | 
|---|
|  | 2585 | // convert to native utf16 | 
|---|
|  | 2586 | uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest); | 
|---|
|  | 2587 |  | 
|---|
|  | 2588 | // swap if necessary | 
|---|
|  | 2589 | xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | 
|---|
|  | 2590 |  | 
|---|
|  | 2591 | if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest)); | 
|---|
|  | 2592 |  | 
|---|
|  | 2593 | return static_cast<size_t>(end - dest) * sizeof(uint16_t); | 
|---|
|  | 2594 | } | 
|---|
|  | 2595 |  | 
|---|
|  | 2596 | if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | 
|---|
|  | 2597 | { | 
|---|
|  | 2598 | uint32_t* dest = reinterpret_cast<uint32_t*>(result); | 
|---|
|  | 2599 |  | 
|---|
|  | 2600 | // convert to native utf32 | 
|---|
|  | 2601 | uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest); | 
|---|
|  | 2602 |  | 
|---|
|  | 2603 | // swap if necessary | 
|---|
|  | 2604 | xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | 
|---|
|  | 2605 |  | 
|---|
|  | 2606 | if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest)); | 
|---|
|  | 2607 |  | 
|---|
|  | 2608 | return static_cast<size_t>(end - dest) * sizeof(uint32_t); | 
|---|
|  | 2609 | } | 
|---|
|  | 2610 |  | 
|---|
|  | 2611 | assert(!"Invalid encoding"); | 
|---|
|  | 2612 | return 0; | 
|---|
|  | 2613 | } | 
|---|
|  | 2614 | #endif | 
|---|
|  | 2615 |  | 
|---|
|  | 2616 | class xml_buffered_writer | 
|---|
|  | 2617 | { | 
|---|
|  | 2618 | xml_buffered_writer(const xml_buffered_writer&); | 
|---|
|  | 2619 | xml_buffered_writer& operator=(const xml_buffered_writer&); | 
|---|
|  | 2620 |  | 
|---|
|  | 2621 | public: | 
|---|
|  | 2622 | xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding)) | 
|---|
|  | 2623 | { | 
|---|
|  | 2624 | } | 
|---|
|  | 2625 |  | 
|---|
|  | 2626 | ~xml_buffered_writer() | 
|---|
|  | 2627 | { | 
|---|
|  | 2628 | flush(); | 
|---|
|  | 2629 | } | 
|---|
|  | 2630 |  | 
|---|
|  | 2631 | void flush() | 
|---|
|  | 2632 | { | 
|---|
|  | 2633 | flush(buffer, bufsize); | 
|---|
|  | 2634 | bufsize = 0; | 
|---|
|  | 2635 | } | 
|---|
|  | 2636 |  | 
|---|
|  | 2637 | void flush(const char_t* data, size_t size) | 
|---|
|  | 2638 | { | 
|---|
|  | 2639 | if (size == 0) return; | 
|---|
|  | 2640 |  | 
|---|
|  | 2641 | // fast path, just write data | 
|---|
|  | 2642 | if (encoding == get_write_native_encoding()) | 
|---|
|  | 2643 | writer.write(data, size * sizeof(char_t)); | 
|---|
|  | 2644 | else | 
|---|
|  | 2645 | { | 
|---|
|  | 2646 | // convert chunk | 
|---|
|  | 2647 | size_t result = convert_buffer(scratch, data, size, encoding); | 
|---|
|  | 2648 | assert(result <= sizeof(scratch)); | 
|---|
|  | 2649 |  | 
|---|
|  | 2650 | // write data | 
|---|
|  | 2651 | writer.write(scratch, result); | 
|---|
|  | 2652 | } | 
|---|
|  | 2653 | } | 
|---|
|  | 2654 |  | 
|---|
|  | 2655 | void write(const char_t* data, size_t length) | 
|---|
|  | 2656 | { | 
|---|
|  | 2657 | if (bufsize + length > bufcapacity) | 
|---|
|  | 2658 | { | 
|---|
|  | 2659 | // flush the remaining buffer contents | 
|---|
|  | 2660 | flush(); | 
|---|
|  | 2661 |  | 
|---|
|  | 2662 | // handle large chunks | 
|---|
|  | 2663 | if (length > bufcapacity) | 
|---|
|  | 2664 | { | 
|---|
|  | 2665 | if (encoding == get_write_native_encoding()) | 
|---|
|  | 2666 | { | 
|---|
|  | 2667 | // fast path, can just write data chunk | 
|---|
|  | 2668 | writer.write(data, length * sizeof(char_t)); | 
|---|
|  | 2669 | return; | 
|---|
|  | 2670 | } | 
|---|
|  | 2671 |  | 
|---|
|  | 2672 | // need to convert in suitable chunks | 
|---|
|  | 2673 | while (length > bufcapacity) | 
|---|
|  | 2674 | { | 
|---|
|  | 2675 | // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer | 
|---|
|  | 2676 | // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) | 
|---|
|  | 2677 | size_t chunk_size = get_valid_length(data, bufcapacity); | 
|---|
|  | 2678 |  | 
|---|
|  | 2679 | // convert chunk and write | 
|---|
|  | 2680 | flush(data, chunk_size); | 
|---|
|  | 2681 |  | 
|---|
|  | 2682 | // iterate | 
|---|
|  | 2683 | data += chunk_size; | 
|---|
|  | 2684 | length -= chunk_size; | 
|---|
|  | 2685 | } | 
|---|
|  | 2686 |  | 
|---|
|  | 2687 | // small tail is copied below | 
|---|
|  | 2688 | bufsize = 0; | 
|---|
|  | 2689 | } | 
|---|
|  | 2690 | } | 
|---|
|  | 2691 |  | 
|---|
|  | 2692 | memcpy(buffer + bufsize, data, length * sizeof(char_t)); | 
|---|
|  | 2693 | bufsize += length; | 
|---|
|  | 2694 | } | 
|---|
|  | 2695 |  | 
|---|
|  | 2696 | void write(const char_t* data) | 
|---|
|  | 2697 | { | 
|---|
|  | 2698 | write(data, strlength(data)); | 
|---|
|  | 2699 | } | 
|---|
|  | 2700 |  | 
|---|
|  | 2701 | void write(char_t d0) | 
|---|
|  | 2702 | { | 
|---|
|  | 2703 | if (bufsize + 1 > bufcapacity) flush(); | 
|---|
|  | 2704 |  | 
|---|
|  | 2705 | buffer[bufsize + 0] = d0; | 
|---|
|  | 2706 | bufsize += 1; | 
|---|
|  | 2707 | } | 
|---|
|  | 2708 |  | 
|---|
|  | 2709 | void write(char_t d0, char_t d1) | 
|---|
|  | 2710 | { | 
|---|
|  | 2711 | if (bufsize + 2 > bufcapacity) flush(); | 
|---|
|  | 2712 |  | 
|---|
|  | 2713 | buffer[bufsize + 0] = d0; | 
|---|
|  | 2714 | buffer[bufsize + 1] = d1; | 
|---|
|  | 2715 | bufsize += 2; | 
|---|
|  | 2716 | } | 
|---|
|  | 2717 |  | 
|---|
|  | 2718 | void write(char_t d0, char_t d1, char_t d2) | 
|---|
|  | 2719 | { | 
|---|
|  | 2720 | if (bufsize + 3 > bufcapacity) flush(); | 
|---|
|  | 2721 |  | 
|---|
|  | 2722 | buffer[bufsize + 0] = d0; | 
|---|
|  | 2723 | buffer[bufsize + 1] = d1; | 
|---|
|  | 2724 | buffer[bufsize + 2] = d2; | 
|---|
|  | 2725 | bufsize += 3; | 
|---|
|  | 2726 | } | 
|---|
|  | 2727 |  | 
|---|
|  | 2728 | void write(char_t d0, char_t d1, char_t d2, char_t d3) | 
|---|
|  | 2729 | { | 
|---|
|  | 2730 | if (bufsize + 4 > bufcapacity) flush(); | 
|---|
|  | 2731 |  | 
|---|
|  | 2732 | buffer[bufsize + 0] = d0; | 
|---|
|  | 2733 | buffer[bufsize + 1] = d1; | 
|---|
|  | 2734 | buffer[bufsize + 2] = d2; | 
|---|
|  | 2735 | buffer[bufsize + 3] = d3; | 
|---|
|  | 2736 | bufsize += 4; | 
|---|
|  | 2737 | } | 
|---|
|  | 2738 |  | 
|---|
|  | 2739 | void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) | 
|---|
|  | 2740 | { | 
|---|
|  | 2741 | if (bufsize + 5 > bufcapacity) flush(); | 
|---|
|  | 2742 |  | 
|---|
|  | 2743 | buffer[bufsize + 0] = d0; | 
|---|
|  | 2744 | buffer[bufsize + 1] = d1; | 
|---|
|  | 2745 | buffer[bufsize + 2] = d2; | 
|---|
|  | 2746 | buffer[bufsize + 3] = d3; | 
|---|
|  | 2747 | buffer[bufsize + 4] = d4; | 
|---|
|  | 2748 | bufsize += 5; | 
|---|
|  | 2749 | } | 
|---|
|  | 2750 |  | 
|---|
|  | 2751 | void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) | 
|---|
|  | 2752 | { | 
|---|
|  | 2753 | if (bufsize + 6 > bufcapacity) flush(); | 
|---|
|  | 2754 |  | 
|---|
|  | 2755 | buffer[bufsize + 0] = d0; | 
|---|
|  | 2756 | buffer[bufsize + 1] = d1; | 
|---|
|  | 2757 | buffer[bufsize + 2] = d2; | 
|---|
|  | 2758 | buffer[bufsize + 3] = d3; | 
|---|
|  | 2759 | buffer[bufsize + 4] = d4; | 
|---|
|  | 2760 | buffer[bufsize + 5] = d5; | 
|---|
|  | 2761 | bufsize += 6; | 
|---|
|  | 2762 | } | 
|---|
|  | 2763 |  | 
|---|
|  | 2764 | // utf8 maximum expansion: x4 (-> utf32) | 
|---|
|  | 2765 | // utf16 maximum expansion: x2 (-> utf32) | 
|---|
|  | 2766 | // utf32 maximum expansion: x1 | 
|---|
|  | 2767 | enum { bufcapacity = 2048 }; | 
|---|
|  | 2768 |  | 
|---|
|  | 2769 | char_t buffer[bufcapacity]; | 
|---|
|  | 2770 | char scratch[4 * bufcapacity]; | 
|---|
|  | 2771 |  | 
|---|
|  | 2772 | xml_writer& writer; | 
|---|
|  | 2773 | size_t bufsize; | 
|---|
|  | 2774 | xml_encoding encoding; | 
|---|
|  | 2775 | }; | 
|---|
|  | 2776 |  | 
|---|
|  | 2777 | void write_bom(xml_writer& writer, xml_encoding encoding) | 
|---|
|  | 2778 | { | 
|---|
|  | 2779 | switch (encoding) | 
|---|
|  | 2780 | { | 
|---|
|  | 2781 | case encoding_utf8: | 
|---|
|  | 2782 | writer.write("\xef\xbb\xbf", 3); | 
|---|
|  | 2783 | break; | 
|---|
|  | 2784 |  | 
|---|
|  | 2785 | case encoding_utf16_be: | 
|---|
|  | 2786 | writer.write("\xfe\xff", 2); | 
|---|
|  | 2787 | break; | 
|---|
|  | 2788 |  | 
|---|
|  | 2789 | case encoding_utf16_le: | 
|---|
|  | 2790 | writer.write("\xff\xfe", 2); | 
|---|
|  | 2791 | break; | 
|---|
|  | 2792 |  | 
|---|
|  | 2793 | case encoding_utf32_be: | 
|---|
|  | 2794 | writer.write("\x00\x00\xfe\xff", 4); | 
|---|
|  | 2795 | break; | 
|---|
|  | 2796 |  | 
|---|
|  | 2797 | case encoding_utf32_le: | 
|---|
|  | 2798 | writer.write("\xff\xfe\x00\x00", 4); | 
|---|
|  | 2799 | break; | 
|---|
|  | 2800 |  | 
|---|
|  | 2801 | default: | 
|---|
|  | 2802 | assert(!"Invalid encoding"); | 
|---|
|  | 2803 | } | 
|---|
|  | 2804 | } | 
|---|
|  | 2805 |  | 
|---|
|  | 2806 | void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type) | 
|---|
|  | 2807 | { | 
|---|
|  | 2808 | while (*s) | 
|---|
|  | 2809 | { | 
|---|
|  | 2810 | const char_t* prev = s; | 
|---|
|  | 2811 |  | 
|---|
|  | 2812 | // While *s is a usual symbol | 
|---|
|  | 2813 | while (!IS_CHARTYPEX(*s, type)) ++s; | 
|---|
|  | 2814 |  | 
|---|
|  | 2815 | writer.write(prev, static_cast<size_t>(s - prev)); | 
|---|
|  | 2816 |  | 
|---|
|  | 2817 | switch (*s) | 
|---|
|  | 2818 | { | 
|---|
|  | 2819 | case 0: break; | 
|---|
|  | 2820 | case '&': | 
|---|
|  | 2821 | writer.write('&', 'a', 'm', 'p', ';'); | 
|---|
|  | 2822 | ++s; | 
|---|
|  | 2823 | break; | 
|---|
|  | 2824 | case '<': | 
|---|
|  | 2825 | writer.write('&', 'l', 't', ';'); | 
|---|
|  | 2826 | ++s; | 
|---|
|  | 2827 | break; | 
|---|
|  | 2828 | case '>': | 
|---|
|  | 2829 | writer.write('&', 'g', 't', ';'); | 
|---|
|  | 2830 | ++s; | 
|---|
|  | 2831 | break; | 
|---|
|  | 2832 | case '"': | 
|---|
|  | 2833 | writer.write('&', 'q', 'u', 'o', 't', ';'); | 
|---|
|  | 2834 | ++s; | 
|---|
|  | 2835 | break; | 
|---|
|  | 2836 | default: // s is not a usual symbol | 
|---|
|  | 2837 | { | 
|---|
|  | 2838 | unsigned int ch = static_cast<unsigned int>(*s++); | 
|---|
|  | 2839 | assert(ch < 32); | 
|---|
|  | 2840 |  | 
|---|
|  | 2841 | writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';'); | 
|---|
|  | 2842 | } | 
|---|
|  | 2843 | } | 
|---|
|  | 2844 | } | 
|---|
|  | 2845 | } | 
|---|
|  | 2846 |  | 
|---|
|  | 2847 | void text_output_cdata(xml_buffered_writer& writer, const char_t* s) | 
|---|
|  | 2848 | { | 
|---|
|  | 2849 | do | 
|---|
|  | 2850 | { | 
|---|
|  | 2851 | writer.write('<', '!', '[', 'C', 'D'); | 
|---|
|  | 2852 | writer.write('A', 'T', 'A', '['); | 
|---|
|  | 2853 |  | 
|---|
|  | 2854 | const char_t* prev = s; | 
|---|
|  | 2855 |  | 
|---|
|  | 2856 | // look for ]]> sequence - we can't output it as is since it terminates CDATA | 
|---|
|  | 2857 | while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; | 
|---|
|  | 2858 |  | 
|---|
|  | 2859 | // skip ]] if we stopped at ]]>, > will go to the next CDATA section | 
|---|
|  | 2860 | if (*s) s += 2; | 
|---|
|  | 2861 |  | 
|---|
|  | 2862 | writer.write(prev, static_cast<size_t>(s - prev)); | 
|---|
|  | 2863 |  | 
|---|
|  | 2864 | writer.write(']', ']', '>'); | 
|---|
|  | 2865 | } | 
|---|
|  | 2866 | while (*s); | 
|---|
|  | 2867 | } | 
|---|
|  | 2868 |  | 
|---|
|  | 2869 | void node_output_attributes(xml_buffered_writer& writer, const xml_node& node) | 
|---|
|  | 2870 | { | 
|---|
|  | 2871 | const char_t* default_name = PUGIXML_TEXT(":anonymous"); | 
|---|
|  | 2872 |  | 
|---|
|  | 2873 | for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) | 
|---|
|  | 2874 | { | 
|---|
|  | 2875 | writer.write(' '); | 
|---|
|  | 2876 | writer.write(a.name()[0] ? a.name() : default_name); | 
|---|
|  | 2877 | writer.write('=', '"'); | 
|---|
|  | 2878 |  | 
|---|
|  | 2879 | text_output_escaped(writer, a.value(), ctx_special_attr); | 
|---|
|  | 2880 |  | 
|---|
|  | 2881 | writer.write('"'); | 
|---|
|  | 2882 | } | 
|---|
|  | 2883 | } | 
|---|
|  | 2884 |  | 
|---|
|  | 2885 | void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth) | 
|---|
|  | 2886 | { | 
|---|
|  | 2887 | const char_t* default_name = PUGIXML_TEXT(":anonymous"); | 
|---|
|  | 2888 |  | 
|---|
|  | 2889 | if ((flags & format_indent) != 0 && (flags & format_raw) == 0) | 
|---|
|  | 2890 | for (unsigned int i = 0; i < depth; ++i) writer.write(indent); | 
|---|
|  | 2891 |  | 
|---|
|  | 2892 | switch (node.type()) | 
|---|
|  | 2893 | { | 
|---|
|  | 2894 | case node_document: | 
|---|
|  | 2895 | { | 
|---|
|  | 2896 | for (xml_node n = node.first_child(); n; n = n.next_sibling()) | 
|---|
|  | 2897 | node_output(writer, n, indent, flags, depth); | 
|---|
|  | 2898 | break; | 
|---|
|  | 2899 | } | 
|---|
|  | 2900 |  | 
|---|
|  | 2901 | case node_element: | 
|---|
|  | 2902 | { | 
|---|
|  | 2903 | const char_t* name = node.name()[0] ? node.name() : default_name; | 
|---|
|  | 2904 |  | 
|---|
|  | 2905 | writer.write('<'); | 
|---|
|  | 2906 | writer.write(name); | 
|---|
|  | 2907 |  | 
|---|
|  | 2908 | node_output_attributes(writer, node); | 
|---|
|  | 2909 |  | 
|---|
|  | 2910 | if (flags & format_raw) | 
|---|
|  | 2911 | { | 
|---|
|  | 2912 | if (!node.first_child()) | 
|---|
|  | 2913 | writer.write(' ', '/', '>'); | 
|---|
|  | 2914 | else | 
|---|
|  | 2915 | { | 
|---|
|  | 2916 | writer.write('>'); | 
|---|
|  | 2917 |  | 
|---|
|  | 2918 | for (xml_node n = node.first_child(); n; n = n.next_sibling()) | 
|---|
|  | 2919 | node_output(writer, n, indent, flags, depth + 1); | 
|---|
|  | 2920 |  | 
|---|
|  | 2921 | writer.write('<', '/'); | 
|---|
|  | 2922 | writer.write(name); | 
|---|
|  | 2923 | writer.write('>'); | 
|---|
|  | 2924 | } | 
|---|
|  | 2925 | } | 
|---|
|  | 2926 | else if (!node.first_child()) | 
|---|
|  | 2927 | writer.write(' ', '/', '>', '\n'); | 
|---|
|  | 2928 | else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata)) | 
|---|
|  | 2929 | { | 
|---|
|  | 2930 | writer.write('>'); | 
|---|
|  | 2931 |  | 
|---|
|  | 2932 | if (node.first_child().type() == node_pcdata) | 
|---|
|  | 2933 | text_output_escaped(writer, node.first_child().value(), ctx_special_pcdata); | 
|---|
|  | 2934 | else | 
|---|
|  | 2935 | text_output_cdata(writer, node.first_child().value()); | 
|---|
|  | 2936 |  | 
|---|
|  | 2937 | writer.write('<', '/'); | 
|---|
|  | 2938 | writer.write(name); | 
|---|
|  | 2939 | writer.write('>', '\n'); | 
|---|
|  | 2940 | } | 
|---|
|  | 2941 | else | 
|---|
|  | 2942 | { | 
|---|
|  | 2943 | writer.write('>', '\n'); | 
|---|
|  | 2944 |  | 
|---|
|  | 2945 | for (xml_node n = node.first_child(); n; n = n.next_sibling()) | 
|---|
|  | 2946 | node_output(writer, n, indent, flags, depth + 1); | 
|---|
|  | 2947 |  | 
|---|
|  | 2948 | if ((flags & format_indent) != 0 && (flags & format_raw) == 0) | 
|---|
|  | 2949 | for (unsigned int i = 0; i < depth; ++i) writer.write(indent); | 
|---|
|  | 2950 |  | 
|---|
|  | 2951 | writer.write('<', '/'); | 
|---|
|  | 2952 | writer.write(name); | 
|---|
|  | 2953 | writer.write('>', '\n'); | 
|---|
|  | 2954 | } | 
|---|
|  | 2955 |  | 
|---|
|  | 2956 | break; | 
|---|
|  | 2957 | } | 
|---|
|  | 2958 |  | 
|---|
|  | 2959 | case node_pcdata: | 
|---|
|  | 2960 | text_output_escaped(writer, node.value(), ctx_special_pcdata); | 
|---|
|  | 2961 | if ((flags & format_raw) == 0) writer.write('\n'); | 
|---|
|  | 2962 | break; | 
|---|
|  | 2963 |  | 
|---|
|  | 2964 | case node_cdata: | 
|---|
|  | 2965 | text_output_cdata(writer, node.value()); | 
|---|
|  | 2966 | if ((flags & format_raw) == 0) writer.write('\n'); | 
|---|
|  | 2967 | break; | 
|---|
|  | 2968 |  | 
|---|
|  | 2969 | case node_comment: | 
|---|
|  | 2970 | writer.write('<', '!', '-', '-'); | 
|---|
|  | 2971 | writer.write(node.value()); | 
|---|
|  | 2972 | writer.write('-', '-', '>'); | 
|---|
|  | 2973 | if ((flags & format_raw) == 0) writer.write('\n'); | 
|---|
|  | 2974 | break; | 
|---|
|  | 2975 |  | 
|---|
|  | 2976 | case node_pi: | 
|---|
|  | 2977 | case node_declaration: | 
|---|
|  | 2978 | writer.write('<', '?'); | 
|---|
|  | 2979 | writer.write(node.name()[0] ? node.name() : default_name); | 
|---|
|  | 2980 |  | 
|---|
|  | 2981 | if (node.type() == node_declaration) | 
|---|
|  | 2982 | { | 
|---|
|  | 2983 | node_output_attributes(writer, node); | 
|---|
|  | 2984 | } | 
|---|
|  | 2985 | else if (node.value()[0]) | 
|---|
|  | 2986 | { | 
|---|
|  | 2987 | writer.write(' '); | 
|---|
|  | 2988 | writer.write(node.value()); | 
|---|
|  | 2989 | } | 
|---|
|  | 2990 |  | 
|---|
|  | 2991 | writer.write('?', '>'); | 
|---|
|  | 2992 | if ((flags & format_raw) == 0) writer.write('\n'); | 
|---|
|  | 2993 | break; | 
|---|
|  | 2994 |  | 
|---|
|  | 2995 | case node_doctype: | 
|---|
|  | 2996 | writer.write('<', '!', 'D', 'O', 'C'); | 
|---|
|  | 2997 | writer.write('T', 'Y', 'P', 'E'); | 
|---|
|  | 2998 |  | 
|---|
|  | 2999 | if (node.value()[0]) | 
|---|
|  | 3000 | { | 
|---|
|  | 3001 | writer.write(' '); | 
|---|
|  | 3002 | writer.write(node.value()); | 
|---|
|  | 3003 | } | 
|---|
|  | 3004 |  | 
|---|
|  | 3005 | writer.write('>'); | 
|---|
|  | 3006 | if ((flags & format_raw) == 0) writer.write('\n'); | 
|---|
|  | 3007 | break; | 
|---|
|  | 3008 |  | 
|---|
|  | 3009 | default: | 
|---|
|  | 3010 | assert(!"Invalid node type"); | 
|---|
|  | 3011 | } | 
|---|
|  | 3012 | } | 
|---|
|  | 3013 |  | 
|---|
|  | 3014 | inline bool has_declaration(const xml_node& node) | 
|---|
|  | 3015 | { | 
|---|
|  | 3016 | for (xml_node child = node.first_child(); child; child = child.next_sibling()) | 
|---|
|  | 3017 | { | 
|---|
|  | 3018 | xml_node_type type = child.type(); | 
|---|
|  | 3019 |  | 
|---|
|  | 3020 | if (type == node_declaration) return true; | 
|---|
|  | 3021 | if (type == node_element) return false; | 
|---|
|  | 3022 | } | 
|---|
|  | 3023 |  | 
|---|
|  | 3024 | return false; | 
|---|
|  | 3025 | } | 
|---|
|  | 3026 |  | 
|---|
|  | 3027 | inline bool allow_insert_child(xml_node_type parent, xml_node_type child) | 
|---|
|  | 3028 | { | 
|---|
|  | 3029 | if (parent != node_document && parent != node_element) return false; | 
|---|
|  | 3030 | if (child == node_document || child == node_null) return false; | 
|---|
|  | 3031 | if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; | 
|---|
|  | 3032 |  | 
|---|
|  | 3033 | return true; | 
|---|
|  | 3034 | } | 
|---|
|  | 3035 |  | 
|---|
|  | 3036 | void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip) | 
|---|
|  | 3037 | { | 
|---|
|  | 3038 | assert(dest.type() == source.type()); | 
|---|
|  | 3039 |  | 
|---|
|  | 3040 | switch (source.type()) | 
|---|
|  | 3041 | { | 
|---|
|  | 3042 | case node_element: | 
|---|
|  | 3043 | { | 
|---|
|  | 3044 | dest.set_name(source.name()); | 
|---|
|  | 3045 |  | 
|---|
|  | 3046 | for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) | 
|---|
|  | 3047 | dest.append_attribute(a.name()).set_value(a.value()); | 
|---|
|  | 3048 |  | 
|---|
|  | 3049 | for (xml_node c = source.first_child(); c; c = c.next_sibling()) | 
|---|
|  | 3050 | { | 
|---|
|  | 3051 | if (c == skip) continue; | 
|---|
|  | 3052 |  | 
|---|
|  | 3053 | xml_node cc = dest.append_child(c.type()); | 
|---|
|  | 3054 | assert(cc); | 
|---|
|  | 3055 |  | 
|---|
|  | 3056 | recursive_copy_skip(cc, c, skip); | 
|---|
|  | 3057 | } | 
|---|
|  | 3058 |  | 
|---|
|  | 3059 | break; | 
|---|
|  | 3060 | } | 
|---|
|  | 3061 |  | 
|---|
|  | 3062 | case node_pcdata: | 
|---|
|  | 3063 | case node_cdata: | 
|---|
|  | 3064 | case node_comment: | 
|---|
|  | 3065 | case node_doctype: | 
|---|
|  | 3066 | dest.set_value(source.value()); | 
|---|
|  | 3067 | break; | 
|---|
|  | 3068 |  | 
|---|
|  | 3069 | case node_pi: | 
|---|
|  | 3070 | dest.set_name(source.name()); | 
|---|
|  | 3071 | dest.set_value(source.value()); | 
|---|
|  | 3072 | break; | 
|---|
|  | 3073 |  | 
|---|
|  | 3074 | case node_declaration: | 
|---|
|  | 3075 | { | 
|---|
|  | 3076 | dest.set_name(source.name()); | 
|---|
|  | 3077 |  | 
|---|
|  | 3078 | for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) | 
|---|
|  | 3079 | dest.append_attribute(a.name()).set_value(a.value()); | 
|---|
|  | 3080 |  | 
|---|
|  | 3081 | break; | 
|---|
|  | 3082 | } | 
|---|
|  | 3083 |  | 
|---|
|  | 3084 | default: | 
|---|
|  | 3085 | assert(!"Invalid node type"); | 
|---|
|  | 3086 | } | 
|---|
|  | 3087 | } | 
|---|
|  | 3088 |  | 
|---|
|  | 3089 | // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick | 
|---|
|  | 3090 | xml_parse_status get_file_size(FILE* file, size_t& out_result) | 
|---|
|  | 3091 | { | 
|---|
|  | 3092 | #if defined(_MSC_VER) && _MSC_VER >= 1400 | 
|---|
|  | 3093 | // there are 64-bit versions of fseek/ftell, let's use them | 
|---|
|  | 3094 | typedef __int64 length_type; | 
|---|
|  | 3095 |  | 
|---|
|  | 3096 | _fseeki64(file, 0, SEEK_END); | 
|---|
|  | 3097 | length_type length = _ftelli64(file); | 
|---|
|  | 3098 | _fseeki64(file, 0, SEEK_SET); | 
|---|
|  | 3099 | #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__) | 
|---|
|  | 3100 | // there are 64-bit versions of fseek/ftell, let's use them | 
|---|
|  | 3101 | typedef off64_t length_type; | 
|---|
|  | 3102 |  | 
|---|
|  | 3103 | fseeko64(file, 0, SEEK_END); | 
|---|
|  | 3104 | length_type length = ftello64(file); | 
|---|
|  | 3105 | fseeko64(file, 0, SEEK_SET); | 
|---|
|  | 3106 | #else | 
|---|
|  | 3107 | // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. | 
|---|
|  | 3108 | typedef long length_type; | 
|---|
|  | 3109 |  | 
|---|
|  | 3110 | fseek(file, 0, SEEK_END); | 
|---|
|  | 3111 | length_type length = ftell(file); | 
|---|
|  | 3112 | fseek(file, 0, SEEK_SET); | 
|---|
|  | 3113 | #endif | 
|---|
|  | 3114 |  | 
|---|
|  | 3115 | // check for I/O errors | 
|---|
|  | 3116 | if (length < 0) return status_io_error; | 
|---|
|  | 3117 |  | 
|---|
|  | 3118 | // check for overflow | 
|---|
|  | 3119 | size_t result = static_cast<size_t>(length); | 
|---|
|  | 3120 |  | 
|---|
|  | 3121 | if (static_cast<length_type>(result) != length) return status_out_of_memory; | 
|---|
|  | 3122 |  | 
|---|
|  | 3123 | // finalize | 
|---|
|  | 3124 | out_result = result; | 
|---|
|  | 3125 |  | 
|---|
|  | 3126 | return status_ok; | 
|---|
|  | 3127 | } | 
|---|
|  | 3128 |  | 
|---|
|  | 3129 | xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding) | 
|---|
|  | 3130 | { | 
|---|
|  | 3131 | if (!file) return make_parse_result(status_file_not_found); | 
|---|
|  | 3132 |  | 
|---|
|  | 3133 | // get file size (can result in I/O errors) | 
|---|
|  | 3134 | size_t size = 0; | 
|---|
|  | 3135 | xml_parse_status size_status = get_file_size(file, size); | 
|---|
|  | 3136 |  | 
|---|
|  | 3137 | if (size_status != status_ok) | 
|---|
|  | 3138 | { | 
|---|
|  | 3139 | fclose(file); | 
|---|
|  | 3140 | return make_parse_result(size_status); | 
|---|
|  | 3141 | } | 
|---|
|  | 3142 |  | 
|---|
|  | 3143 | // allocate buffer for the whole file | 
|---|
|  | 3144 | char* contents = static_cast<char*>(global_allocate(size > 0 ? size : 1)); | 
|---|
|  | 3145 |  | 
|---|
|  | 3146 | if (!contents) | 
|---|
|  | 3147 | { | 
|---|
|  | 3148 | fclose(file); | 
|---|
|  | 3149 | return make_parse_result(status_out_of_memory); | 
|---|
|  | 3150 | } | 
|---|
|  | 3151 |  | 
|---|
|  | 3152 | // read file in memory | 
|---|
|  | 3153 | size_t read_size = fread(contents, 1, size, file); | 
|---|
|  | 3154 | fclose(file); | 
|---|
|  | 3155 |  | 
|---|
|  | 3156 | if (read_size != size) | 
|---|
|  | 3157 | { | 
|---|
|  | 3158 | global_deallocate(contents); | 
|---|
|  | 3159 | return make_parse_result(status_io_error); | 
|---|
|  | 3160 | } | 
|---|
|  | 3161 |  | 
|---|
|  | 3162 | return doc.load_buffer_inplace_own(contents, size, options, encoding); | 
|---|
|  | 3163 | } | 
|---|
|  | 3164 |  | 
|---|
|  | 3165 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 3166 | template <typename T> xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding) | 
|---|
|  | 3167 | { | 
|---|
|  | 3168 | // get length of remaining data in stream | 
|---|
|  | 3169 | typename std::basic_istream<T>::pos_type pos = stream.tellg(); | 
|---|
|  | 3170 | stream.seekg(0, std::ios::end); | 
|---|
|  | 3171 | std::streamoff length = stream.tellg() - pos; | 
|---|
|  | 3172 | stream.seekg(pos); | 
|---|
|  | 3173 |  | 
|---|
|  | 3174 | if (stream.fail() || pos < 0) return make_parse_result(status_io_error); | 
|---|
|  | 3175 |  | 
|---|
|  | 3176 | // guard against huge files | 
|---|
|  | 3177 | size_t read_length = static_cast<size_t>(length); | 
|---|
|  | 3178 |  | 
|---|
|  | 3179 | if (static_cast<std::streamsize>(read_length) != length || length < 0) return make_parse_result(status_out_of_memory); | 
|---|
|  | 3180 |  | 
|---|
|  | 3181 | // read stream data into memory (guard against stream exceptions with buffer holder) | 
|---|
|  | 3182 | buffer_holder buffer(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T)), global_deallocate); | 
|---|
|  | 3183 | if (!buffer.data) return make_parse_result(status_out_of_memory); | 
|---|
|  | 3184 |  | 
|---|
|  | 3185 | stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length)); | 
|---|
|  | 3186 |  | 
|---|
|  | 3187 | // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors | 
|---|
|  | 3188 | if (stream.bad()) return make_parse_result(status_io_error); | 
|---|
|  | 3189 |  | 
|---|
|  | 3190 | // load data from buffer | 
|---|
|  | 3191 | size_t actual_length = static_cast<size_t>(stream.gcount()); | 
|---|
|  | 3192 | assert(actual_length <= read_length); | 
|---|
|  | 3193 |  | 
|---|
|  | 3194 | return doc.load_buffer_inplace_own(buffer.release(), actual_length * sizeof(T), options, encoding); | 
|---|
|  | 3195 | } | 
|---|
|  | 3196 | #endif | 
|---|
|  | 3197 |  | 
|---|
|  | 3198 | #if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__MINGW32__) | 
|---|
|  | 3199 | FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) | 
|---|
|  | 3200 | { | 
|---|
|  | 3201 | return _wfopen(path, mode); | 
|---|
|  | 3202 | } | 
|---|
|  | 3203 | #else | 
|---|
|  | 3204 | char* convert_path_heap(const wchar_t* str) | 
|---|
|  | 3205 | { | 
|---|
|  | 3206 | assert(str); | 
|---|
|  | 3207 |  | 
|---|
|  | 3208 | // first pass: get length in utf8 characters | 
|---|
|  | 3209 | size_t length = wcslen(str); | 
|---|
|  | 3210 | size_t size = as_utf8_begin(str, length); | 
|---|
|  | 3211 |  | 
|---|
|  | 3212 | // allocate resulting string | 
|---|
|  | 3213 | char* result = static_cast<char*>(global_allocate(size + 1)); | 
|---|
|  | 3214 | if (!result) return 0; | 
|---|
|  | 3215 |  | 
|---|
|  | 3216 | // second pass: convert to utf8 | 
|---|
|  | 3217 | as_utf8_end(result, size, str, length); | 
|---|
|  | 3218 |  | 
|---|
|  | 3219 | return result; | 
|---|
|  | 3220 | } | 
|---|
|  | 3221 |  | 
|---|
|  | 3222 | FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) | 
|---|
|  | 3223 | { | 
|---|
|  | 3224 | // there is no standard function to open wide paths, so our best bet is to try utf8 path | 
|---|
|  | 3225 | char* path_utf8 = convert_path_heap(path); | 
|---|
|  | 3226 | if (!path_utf8) return 0; | 
|---|
|  | 3227 |  | 
|---|
|  | 3228 | // convert mode to ASCII (we mirror _wfopen interface) | 
|---|
|  | 3229 | char mode_ascii[4] = {0}; | 
|---|
|  | 3230 | for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]); | 
|---|
|  | 3231 |  | 
|---|
|  | 3232 | // try to open the utf8 path | 
|---|
|  | 3233 | FILE* result = fopen(path_utf8, mode_ascii); | 
|---|
|  | 3234 |  | 
|---|
|  | 3235 | // free dummy buffer | 
|---|
|  | 3236 | global_deallocate(path_utf8); | 
|---|
|  | 3237 |  | 
|---|
|  | 3238 | return result; | 
|---|
|  | 3239 | } | 
|---|
|  | 3240 | #endif | 
|---|
|  | 3241 | } | 
|---|
|  | 3242 |  | 
|---|
|  | 3243 | namespace pugi | 
|---|
|  | 3244 | { | 
|---|
|  | 3245 | xml_writer_file::xml_writer_file(void* file): file(file) | 
|---|
|  | 3246 | { | 
|---|
|  | 3247 | } | 
|---|
|  | 3248 |  | 
|---|
|  | 3249 | void xml_writer_file::write(const void* data, size_t size) | 
|---|
|  | 3250 | { | 
|---|
|  | 3251 | fwrite(data, size, 1, static_cast<FILE*>(file)); | 
|---|
|  | 3252 | } | 
|---|
|  | 3253 |  | 
|---|
|  | 3254 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 3255 | xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0) | 
|---|
|  | 3256 | { | 
|---|
|  | 3257 | } | 
|---|
|  | 3258 |  | 
|---|
|  | 3259 | xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream) | 
|---|
|  | 3260 | { | 
|---|
|  | 3261 | } | 
|---|
|  | 3262 |  | 
|---|
|  | 3263 | void xml_writer_stream::write(const void* data, size_t size) | 
|---|
|  | 3264 | { | 
|---|
|  | 3265 | if (narrow_stream) | 
|---|
|  | 3266 | { | 
|---|
|  | 3267 | assert(!wide_stream); | 
|---|
|  | 3268 | narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size)); | 
|---|
|  | 3269 | } | 
|---|
|  | 3270 | else | 
|---|
|  | 3271 | { | 
|---|
|  | 3272 | assert(wide_stream); | 
|---|
|  | 3273 | assert(size % sizeof(wchar_t) == 0); | 
|---|
|  | 3274 |  | 
|---|
|  | 3275 | wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t))); | 
|---|
|  | 3276 | } | 
|---|
|  | 3277 | } | 
|---|
|  | 3278 | #endif | 
|---|
|  | 3279 |  | 
|---|
|  | 3280 | xml_tree_walker::xml_tree_walker(): _depth(0) | 
|---|
|  | 3281 | { | 
|---|
|  | 3282 | } | 
|---|
|  | 3283 |  | 
|---|
|  | 3284 | xml_tree_walker::~xml_tree_walker() | 
|---|
|  | 3285 | { | 
|---|
|  | 3286 | } | 
|---|
|  | 3287 |  | 
|---|
|  | 3288 | int xml_tree_walker::depth() const | 
|---|
|  | 3289 | { | 
|---|
|  | 3290 | return _depth; | 
|---|
|  | 3291 | } | 
|---|
|  | 3292 |  | 
|---|
|  | 3293 | bool xml_tree_walker::begin(xml_node&) | 
|---|
|  | 3294 | { | 
|---|
|  | 3295 | return true; | 
|---|
|  | 3296 | } | 
|---|
|  | 3297 |  | 
|---|
|  | 3298 | bool xml_tree_walker::end(xml_node&) | 
|---|
|  | 3299 | { | 
|---|
|  | 3300 | return true; | 
|---|
|  | 3301 | } | 
|---|
|  | 3302 |  | 
|---|
|  | 3303 | xml_attribute::xml_attribute(): _attr(0) | 
|---|
|  | 3304 | { | 
|---|
|  | 3305 | } | 
|---|
|  | 3306 |  | 
|---|
|  | 3307 | xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) | 
|---|
|  | 3308 | { | 
|---|
|  | 3309 | } | 
|---|
|  | 3310 |  | 
|---|
|  | 3311 | xml_attribute::operator xml_attribute::unspecified_bool_type() const | 
|---|
|  | 3312 | { | 
|---|
|  | 3313 | return _attr ? &xml_attribute::_attr : 0; | 
|---|
|  | 3314 | } | 
|---|
|  | 3315 |  | 
|---|
|  | 3316 | bool xml_attribute::operator!() const | 
|---|
|  | 3317 | { | 
|---|
|  | 3318 | return !_attr; | 
|---|
|  | 3319 | } | 
|---|
|  | 3320 |  | 
|---|
|  | 3321 | bool xml_attribute::operator==(const xml_attribute& r) const | 
|---|
|  | 3322 | { | 
|---|
|  | 3323 | return (_attr == r._attr); | 
|---|
|  | 3324 | } | 
|---|
|  | 3325 |  | 
|---|
|  | 3326 | bool xml_attribute::operator!=(const xml_attribute& r) const | 
|---|
|  | 3327 | { | 
|---|
|  | 3328 | return (_attr != r._attr); | 
|---|
|  | 3329 | } | 
|---|
|  | 3330 |  | 
|---|
|  | 3331 | bool xml_attribute::operator<(const xml_attribute& r) const | 
|---|
|  | 3332 | { | 
|---|
|  | 3333 | return (_attr < r._attr); | 
|---|
|  | 3334 | } | 
|---|
|  | 3335 |  | 
|---|
|  | 3336 | bool xml_attribute::operator>(const xml_attribute& r) const | 
|---|
|  | 3337 | { | 
|---|
|  | 3338 | return (_attr > r._attr); | 
|---|
|  | 3339 | } | 
|---|
|  | 3340 |  | 
|---|
|  | 3341 | bool xml_attribute::operator<=(const xml_attribute& r) const | 
|---|
|  | 3342 | { | 
|---|
|  | 3343 | return (_attr <= r._attr); | 
|---|
|  | 3344 | } | 
|---|
|  | 3345 |  | 
|---|
|  | 3346 | bool xml_attribute::operator>=(const xml_attribute& r) const | 
|---|
|  | 3347 | { | 
|---|
|  | 3348 | return (_attr >= r._attr); | 
|---|
|  | 3349 | } | 
|---|
|  | 3350 |  | 
|---|
|  | 3351 | xml_attribute xml_attribute::next_attribute() const | 
|---|
|  | 3352 | { | 
|---|
|  | 3353 | return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); | 
|---|
|  | 3354 | } | 
|---|
|  | 3355 |  | 
|---|
|  | 3356 | xml_attribute xml_attribute::previous_attribute() const | 
|---|
|  | 3357 | { | 
|---|
|  | 3358 | return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); | 
|---|
|  | 3359 | } | 
|---|
|  | 3360 |  | 
|---|
|  | 3361 | int xml_attribute::as_int() const | 
|---|
|  | 3362 | { | 
|---|
|  | 3363 | if (!_attr || !_attr->value) return 0; | 
|---|
|  | 3364 |  | 
|---|
|  | 3365 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 3366 | return (int)wcstol(_attr->value, 0, 10); | 
|---|
|  | 3367 | #else | 
|---|
|  | 3368 | return (int)strtol(_attr->value, 0, 10); | 
|---|
|  | 3369 | #endif | 
|---|
|  | 3370 | } | 
|---|
|  | 3371 |  | 
|---|
|  | 3372 | unsigned int xml_attribute::as_uint() const | 
|---|
|  | 3373 | { | 
|---|
|  | 3374 | if (!_attr || !_attr->value) return 0; | 
|---|
|  | 3375 |  | 
|---|
|  | 3376 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 3377 | return (unsigned int)wcstoul(_attr->value, 0, 10); | 
|---|
|  | 3378 | #else | 
|---|
|  | 3379 | return (unsigned int)strtoul(_attr->value, 0, 10); | 
|---|
|  | 3380 | #endif | 
|---|
|  | 3381 | } | 
|---|
|  | 3382 |  | 
|---|
|  | 3383 | double xml_attribute::as_double() const | 
|---|
|  | 3384 | { | 
|---|
|  | 3385 | if (!_attr || !_attr->value) return 0; | 
|---|
|  | 3386 |  | 
|---|
|  | 3387 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 3388 | return wcstod(_attr->value, 0); | 
|---|
|  | 3389 | #else | 
|---|
|  | 3390 | return strtod(_attr->value, 0); | 
|---|
|  | 3391 | #endif | 
|---|
|  | 3392 | } | 
|---|
|  | 3393 |  | 
|---|
|  | 3394 | float xml_attribute::as_float() const | 
|---|
|  | 3395 | { | 
|---|
|  | 3396 | if (!_attr || !_attr->value) return 0; | 
|---|
|  | 3397 |  | 
|---|
|  | 3398 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 3399 | return (float)wcstod(_attr->value, 0); | 
|---|
|  | 3400 | #else | 
|---|
|  | 3401 | return (float)strtod(_attr->value, 0); | 
|---|
|  | 3402 | #endif | 
|---|
|  | 3403 | } | 
|---|
|  | 3404 |  | 
|---|
|  | 3405 | bool xml_attribute::as_bool() const | 
|---|
|  | 3406 | { | 
|---|
|  | 3407 | if (!_attr || !_attr->value) return false; | 
|---|
|  | 3408 |  | 
|---|
|  | 3409 | // only look at first char | 
|---|
|  | 3410 | char_t first = *_attr->value; | 
|---|
|  | 3411 |  | 
|---|
|  | 3412 | // 1*, t* (true), T* (True), y* (yes), Y* (YES) | 
|---|
|  | 3413 | return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); | 
|---|
|  | 3414 | } | 
|---|
|  | 3415 |  | 
|---|
|  | 3416 | bool xml_attribute::empty() const | 
|---|
|  | 3417 | { | 
|---|
|  | 3418 | return !_attr; | 
|---|
|  | 3419 | } | 
|---|
|  | 3420 |  | 
|---|
|  | 3421 | const char_t* xml_attribute::name() const | 
|---|
|  | 3422 | { | 
|---|
|  | 3423 | return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT(""); | 
|---|
|  | 3424 | } | 
|---|
|  | 3425 |  | 
|---|
|  | 3426 | const char_t* xml_attribute::value() const | 
|---|
|  | 3427 | { | 
|---|
|  | 3428 | return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT(""); | 
|---|
|  | 3429 | } | 
|---|
|  | 3430 |  | 
|---|
|  | 3431 | size_t xml_attribute::hash_value() const | 
|---|
|  | 3432 | { | 
|---|
|  | 3433 | return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct)); | 
|---|
|  | 3434 | } | 
|---|
|  | 3435 |  | 
|---|
|  | 3436 | xml_attribute_struct* xml_attribute::internal_object() const | 
|---|
|  | 3437 | { | 
|---|
|  | 3438 | return _attr; | 
|---|
|  | 3439 | } | 
|---|
|  | 3440 |  | 
|---|
|  | 3441 | xml_attribute& xml_attribute::operator=(const char_t* rhs) | 
|---|
|  | 3442 | { | 
|---|
|  | 3443 | set_value(rhs); | 
|---|
|  | 3444 | return *this; | 
|---|
|  | 3445 | } | 
|---|
|  | 3446 |  | 
|---|
|  | 3447 | xml_attribute& xml_attribute::operator=(int rhs) | 
|---|
|  | 3448 | { | 
|---|
|  | 3449 | set_value(rhs); | 
|---|
|  | 3450 | return *this; | 
|---|
|  | 3451 | } | 
|---|
|  | 3452 |  | 
|---|
|  | 3453 | xml_attribute& xml_attribute::operator=(unsigned int rhs) | 
|---|
|  | 3454 | { | 
|---|
|  | 3455 | set_value(rhs); | 
|---|
|  | 3456 | return *this; | 
|---|
|  | 3457 | } | 
|---|
|  | 3458 |  | 
|---|
|  | 3459 | xml_attribute& xml_attribute::operator=(double rhs) | 
|---|
|  | 3460 | { | 
|---|
|  | 3461 | set_value(rhs); | 
|---|
|  | 3462 | return *this; | 
|---|
|  | 3463 | } | 
|---|
|  | 3464 |  | 
|---|
|  | 3465 | xml_attribute& xml_attribute::operator=(bool rhs) | 
|---|
|  | 3466 | { | 
|---|
|  | 3467 | set_value(rhs); | 
|---|
|  | 3468 | return *this; | 
|---|
|  | 3469 | } | 
|---|
|  | 3470 |  | 
|---|
|  | 3471 | bool xml_attribute::set_name(const char_t* rhs) | 
|---|
|  | 3472 | { | 
|---|
|  | 3473 | if (!_attr) return false; | 
|---|
|  | 3474 |  | 
|---|
|  | 3475 | return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs); | 
|---|
|  | 3476 | } | 
|---|
|  | 3477 |  | 
|---|
|  | 3478 | bool xml_attribute::set_value(const char_t* rhs) | 
|---|
|  | 3479 | { | 
|---|
|  | 3480 | if (!_attr) return false; | 
|---|
|  | 3481 |  | 
|---|
|  | 3482 | return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs); | 
|---|
|  | 3483 | } | 
|---|
|  | 3484 |  | 
|---|
|  | 3485 | bool xml_attribute::set_value(int rhs) | 
|---|
|  | 3486 | { | 
|---|
|  | 3487 | char buf[128]; | 
|---|
|  | 3488 | sprintf(buf, "%d", rhs); | 
|---|
|  | 3489 |  | 
|---|
|  | 3490 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 3491 | char_t wbuf[128]; | 
|---|
|  | 3492 | widen_ascii(wbuf, buf); | 
|---|
|  | 3493 |  | 
|---|
|  | 3494 | return set_value(wbuf); | 
|---|
|  | 3495 | #else | 
|---|
|  | 3496 | return set_value(buf); | 
|---|
|  | 3497 | #endif | 
|---|
|  | 3498 | } | 
|---|
|  | 3499 |  | 
|---|
|  | 3500 | bool xml_attribute::set_value(unsigned int rhs) | 
|---|
|  | 3501 | { | 
|---|
|  | 3502 | char buf[128]; | 
|---|
|  | 3503 | sprintf(buf, "%u", rhs); | 
|---|
|  | 3504 |  | 
|---|
|  | 3505 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 3506 | char_t wbuf[128]; | 
|---|
|  | 3507 | widen_ascii(wbuf, buf); | 
|---|
|  | 3508 |  | 
|---|
|  | 3509 | return set_value(wbuf); | 
|---|
|  | 3510 | #else | 
|---|
|  | 3511 | return set_value(buf); | 
|---|
|  | 3512 | #endif | 
|---|
|  | 3513 | } | 
|---|
|  | 3514 |  | 
|---|
|  | 3515 | bool xml_attribute::set_value(double rhs) | 
|---|
|  | 3516 | { | 
|---|
|  | 3517 | char buf[128]; | 
|---|
|  | 3518 | sprintf(buf, "%g", rhs); | 
|---|
|  | 3519 |  | 
|---|
|  | 3520 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 3521 | char_t wbuf[128]; | 
|---|
|  | 3522 | widen_ascii(wbuf, buf); | 
|---|
|  | 3523 |  | 
|---|
|  | 3524 | return set_value(wbuf); | 
|---|
|  | 3525 | #else | 
|---|
|  | 3526 | return set_value(buf); | 
|---|
|  | 3527 | #endif | 
|---|
|  | 3528 | } | 
|---|
|  | 3529 |  | 
|---|
|  | 3530 | bool xml_attribute::set_value(bool rhs) | 
|---|
|  | 3531 | { | 
|---|
|  | 3532 | return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); | 
|---|
|  | 3533 | } | 
|---|
|  | 3534 |  | 
|---|
|  | 3535 | #ifdef __BORLANDC__ | 
|---|
|  | 3536 | bool operator&&(const xml_attribute& lhs, bool rhs) | 
|---|
|  | 3537 | { | 
|---|
|  | 3538 | return (bool)lhs && rhs; | 
|---|
|  | 3539 | } | 
|---|
|  | 3540 |  | 
|---|
|  | 3541 | bool operator||(const xml_attribute& lhs, bool rhs) | 
|---|
|  | 3542 | { | 
|---|
|  | 3543 | return (bool)lhs || rhs; | 
|---|
|  | 3544 | } | 
|---|
|  | 3545 | #endif | 
|---|
|  | 3546 |  | 
|---|
|  | 3547 | xml_node::xml_node(): _root(0) | 
|---|
|  | 3548 | { | 
|---|
|  | 3549 | } | 
|---|
|  | 3550 |  | 
|---|
|  | 3551 | xml_node::xml_node(xml_node_struct* p): _root(p) | 
|---|
|  | 3552 | { | 
|---|
|  | 3553 | } | 
|---|
|  | 3554 |  | 
|---|
|  | 3555 | xml_node::operator xml_node::unspecified_bool_type() const | 
|---|
|  | 3556 | { | 
|---|
|  | 3557 | return _root ? &xml_node::_root : 0; | 
|---|
|  | 3558 | } | 
|---|
|  | 3559 |  | 
|---|
|  | 3560 | bool xml_node::operator!() const | 
|---|
|  | 3561 | { | 
|---|
|  | 3562 | return !_root; | 
|---|
|  | 3563 | } | 
|---|
|  | 3564 |  | 
|---|
|  | 3565 | xml_node::iterator xml_node::begin() const | 
|---|
|  | 3566 | { | 
|---|
|  | 3567 | return iterator(_root ? _root->first_child : 0, _root); | 
|---|
|  | 3568 | } | 
|---|
|  | 3569 |  | 
|---|
|  | 3570 | xml_node::iterator xml_node::end() const | 
|---|
|  | 3571 | { | 
|---|
|  | 3572 | return iterator(0, _root); | 
|---|
|  | 3573 | } | 
|---|
|  | 3574 |  | 
|---|
|  | 3575 | xml_node::attribute_iterator xml_node::attributes_begin() const | 
|---|
|  | 3576 | { | 
|---|
|  | 3577 | return attribute_iterator(_root ? _root->first_attribute : 0, _root); | 
|---|
|  | 3578 | } | 
|---|
|  | 3579 |  | 
|---|
|  | 3580 | xml_node::attribute_iterator xml_node::attributes_end() const | 
|---|
|  | 3581 | { | 
|---|
|  | 3582 | return attribute_iterator(0, _root); | 
|---|
|  | 3583 | } | 
|---|
|  | 3584 |  | 
|---|
|  | 3585 | bool xml_node::operator==(const xml_node& r) const | 
|---|
|  | 3586 | { | 
|---|
|  | 3587 | return (_root == r._root); | 
|---|
|  | 3588 | } | 
|---|
|  | 3589 |  | 
|---|
|  | 3590 | bool xml_node::operator!=(const xml_node& r) const | 
|---|
|  | 3591 | { | 
|---|
|  | 3592 | return (_root != r._root); | 
|---|
|  | 3593 | } | 
|---|
|  | 3594 |  | 
|---|
|  | 3595 | bool xml_node::operator<(const xml_node& r) const | 
|---|
|  | 3596 | { | 
|---|
|  | 3597 | return (_root < r._root); | 
|---|
|  | 3598 | } | 
|---|
|  | 3599 |  | 
|---|
|  | 3600 | bool xml_node::operator>(const xml_node& r) const | 
|---|
|  | 3601 | { | 
|---|
|  | 3602 | return (_root > r._root); | 
|---|
|  | 3603 | } | 
|---|
|  | 3604 |  | 
|---|
|  | 3605 | bool xml_node::operator<=(const xml_node& r) const | 
|---|
|  | 3606 | { | 
|---|
|  | 3607 | return (_root <= r._root); | 
|---|
|  | 3608 | } | 
|---|
|  | 3609 |  | 
|---|
|  | 3610 | bool xml_node::operator>=(const xml_node& r) const | 
|---|
|  | 3611 | { | 
|---|
|  | 3612 | return (_root >= r._root); | 
|---|
|  | 3613 | } | 
|---|
|  | 3614 |  | 
|---|
|  | 3615 | bool xml_node::empty() const | 
|---|
|  | 3616 | { | 
|---|
|  | 3617 | return !_root; | 
|---|
|  | 3618 | } | 
|---|
|  | 3619 |  | 
|---|
|  | 3620 | const char_t* xml_node::name() const | 
|---|
|  | 3621 | { | 
|---|
|  | 3622 | return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); | 
|---|
|  | 3623 | } | 
|---|
|  | 3624 |  | 
|---|
|  | 3625 | xml_node_type xml_node::type() const | 
|---|
|  | 3626 | { | 
|---|
|  | 3627 | return _root ? static_cast<xml_node_type>((_root->header & xml_memory_page_type_mask) + 1) : node_null; | 
|---|
|  | 3628 | } | 
|---|
|  | 3629 |  | 
|---|
|  | 3630 | const char_t* xml_node::value() const | 
|---|
|  | 3631 | { | 
|---|
|  | 3632 | return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); | 
|---|
|  | 3633 | } | 
|---|
|  | 3634 |  | 
|---|
|  | 3635 | xml_node xml_node::child(const char_t* name) const | 
|---|
|  | 3636 | { | 
|---|
|  | 3637 | if (!_root) return xml_node(); | 
|---|
|  | 3638 |  | 
|---|
|  | 3639 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | 
|---|
|  | 3640 | if (i->name && strequal(name, i->name)) return xml_node(i); | 
|---|
|  | 3641 |  | 
|---|
|  | 3642 | return xml_node(); | 
|---|
|  | 3643 | } | 
|---|
|  | 3644 |  | 
|---|
|  | 3645 | xml_attribute xml_node::attribute(const char_t* name) const | 
|---|
|  | 3646 | { | 
|---|
|  | 3647 | if (!_root) return xml_attribute(); | 
|---|
|  | 3648 |  | 
|---|
|  | 3649 | for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) | 
|---|
|  | 3650 | if (i->name && strequal(name, i->name)) | 
|---|
|  | 3651 | return xml_attribute(i); | 
|---|
|  | 3652 |  | 
|---|
|  | 3653 | return xml_attribute(); | 
|---|
|  | 3654 | } | 
|---|
|  | 3655 |  | 
|---|
|  | 3656 | xml_node xml_node::next_sibling(const char_t* name) const | 
|---|
|  | 3657 | { | 
|---|
|  | 3658 | if (!_root) return xml_node(); | 
|---|
|  | 3659 |  | 
|---|
|  | 3660 | for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) | 
|---|
|  | 3661 | if (i->name && strequal(name, i->name)) return xml_node(i); | 
|---|
|  | 3662 |  | 
|---|
|  | 3663 | return xml_node(); | 
|---|
|  | 3664 | } | 
|---|
|  | 3665 |  | 
|---|
|  | 3666 | xml_node xml_node::next_sibling() const | 
|---|
|  | 3667 | { | 
|---|
|  | 3668 | if (!_root) return xml_node(); | 
|---|
|  | 3669 |  | 
|---|
|  | 3670 | if (_root->next_sibling) return xml_node(_root->next_sibling); | 
|---|
|  | 3671 | else return xml_node(); | 
|---|
|  | 3672 | } | 
|---|
|  | 3673 |  | 
|---|
|  | 3674 | xml_node xml_node::previous_sibling(const char_t* name) const | 
|---|
|  | 3675 | { | 
|---|
|  | 3676 | if (!_root) return xml_node(); | 
|---|
|  | 3677 |  | 
|---|
|  | 3678 | for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) | 
|---|
|  | 3679 | if (i->name && strequal(name, i->name)) return xml_node(i); | 
|---|
|  | 3680 |  | 
|---|
|  | 3681 | return xml_node(); | 
|---|
|  | 3682 | } | 
|---|
|  | 3683 |  | 
|---|
|  | 3684 | xml_node xml_node::previous_sibling() const | 
|---|
|  | 3685 | { | 
|---|
|  | 3686 | if (!_root) return xml_node(); | 
|---|
|  | 3687 |  | 
|---|
|  | 3688 | if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); | 
|---|
|  | 3689 | else return xml_node(); | 
|---|
|  | 3690 | } | 
|---|
|  | 3691 |  | 
|---|
|  | 3692 | xml_node xml_node::parent() const | 
|---|
|  | 3693 | { | 
|---|
|  | 3694 | return _root ? xml_node(_root->parent) : xml_node(); | 
|---|
|  | 3695 | } | 
|---|
|  | 3696 |  | 
|---|
|  | 3697 | xml_node xml_node::root() const | 
|---|
|  | 3698 | { | 
|---|
|  | 3699 | if (!_root) return xml_node(); | 
|---|
|  | 3700 |  | 
|---|
|  | 3701 | xml_memory_page* page = reinterpret_cast<xml_memory_page*>(_root->header & xml_memory_page_pointer_mask); | 
|---|
|  | 3702 |  | 
|---|
|  | 3703 | return xml_node(static_cast<xml_document_struct*>(page->allocator)); | 
|---|
|  | 3704 | } | 
|---|
|  | 3705 |  | 
|---|
|  | 3706 | const char_t* xml_node::child_value() const | 
|---|
|  | 3707 | { | 
|---|
|  | 3708 | if (!_root) return PUGIXML_TEXT(""); | 
|---|
|  | 3709 |  | 
|---|
|  | 3710 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | 
|---|
|  | 3711 | { | 
|---|
|  | 3712 | xml_node_type type = static_cast<xml_node_type>((i->header & xml_memory_page_type_mask) + 1); | 
|---|
|  | 3713 |  | 
|---|
|  | 3714 | if (i->value && (type == node_pcdata || type == node_cdata)) | 
|---|
|  | 3715 | return i->value; | 
|---|
|  | 3716 | } | 
|---|
|  | 3717 |  | 
|---|
|  | 3718 | return PUGIXML_TEXT(""); | 
|---|
|  | 3719 | } | 
|---|
|  | 3720 |  | 
|---|
|  | 3721 | const char_t* xml_node::child_value(const char_t* name) const | 
|---|
|  | 3722 | { | 
|---|
|  | 3723 | return child(name).child_value(); | 
|---|
|  | 3724 | } | 
|---|
|  | 3725 |  | 
|---|
|  | 3726 | xml_attribute xml_node::first_attribute() const | 
|---|
|  | 3727 | { | 
|---|
|  | 3728 | return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); | 
|---|
|  | 3729 | } | 
|---|
|  | 3730 |  | 
|---|
|  | 3731 | xml_attribute xml_node::last_attribute() const | 
|---|
|  | 3732 | { | 
|---|
|  | 3733 | return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); | 
|---|
|  | 3734 | } | 
|---|
|  | 3735 |  | 
|---|
|  | 3736 | xml_node xml_node::first_child() const | 
|---|
|  | 3737 | { | 
|---|
|  | 3738 | return _root ? xml_node(_root->first_child) : xml_node(); | 
|---|
|  | 3739 | } | 
|---|
|  | 3740 |  | 
|---|
|  | 3741 | xml_node xml_node::last_child() const | 
|---|
|  | 3742 | { | 
|---|
|  | 3743 | return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); | 
|---|
|  | 3744 | } | 
|---|
|  | 3745 |  | 
|---|
|  | 3746 | bool xml_node::set_name(const char_t* rhs) | 
|---|
|  | 3747 | { | 
|---|
|  | 3748 | switch (type()) | 
|---|
|  | 3749 | { | 
|---|
|  | 3750 | case node_pi: | 
|---|
|  | 3751 | case node_declaration: | 
|---|
|  | 3752 | case node_element: | 
|---|
|  | 3753 | return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs); | 
|---|
|  | 3754 |  | 
|---|
|  | 3755 | default: | 
|---|
|  | 3756 | return false; | 
|---|
|  | 3757 | } | 
|---|
|  | 3758 | } | 
|---|
|  | 3759 |  | 
|---|
|  | 3760 | bool xml_node::set_value(const char_t* rhs) | 
|---|
|  | 3761 | { | 
|---|
|  | 3762 | switch (type()) | 
|---|
|  | 3763 | { | 
|---|
|  | 3764 | case node_pi: | 
|---|
|  | 3765 | case node_cdata: | 
|---|
|  | 3766 | case node_pcdata: | 
|---|
|  | 3767 | case node_comment: | 
|---|
|  | 3768 | case node_doctype: | 
|---|
|  | 3769 | return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs); | 
|---|
|  | 3770 |  | 
|---|
|  | 3771 | default: | 
|---|
|  | 3772 | return false; | 
|---|
|  | 3773 | } | 
|---|
|  | 3774 | } | 
|---|
|  | 3775 |  | 
|---|
|  | 3776 | xml_attribute xml_node::append_attribute(const char_t* name) | 
|---|
|  | 3777 | { | 
|---|
|  | 3778 | if (type() != node_element && type() != node_declaration) return xml_attribute(); | 
|---|
|  | 3779 |  | 
|---|
|  | 3780 | xml_attribute a(append_attribute_ll(_root, get_allocator(_root))); | 
|---|
|  | 3781 | a.set_name(name); | 
|---|
|  | 3782 |  | 
|---|
|  | 3783 | return a; | 
|---|
|  | 3784 | } | 
|---|
|  | 3785 |  | 
|---|
|  | 3786 | xml_attribute xml_node::prepend_attribute(const char_t* name) | 
|---|
|  | 3787 | { | 
|---|
|  | 3788 | if (type() != node_element && type() != node_declaration) return xml_attribute(); | 
|---|
|  | 3789 |  | 
|---|
|  | 3790 | xml_attribute a(allocate_attribute(get_allocator(_root))); | 
|---|
|  | 3791 | if (!a) return xml_attribute(); | 
|---|
|  | 3792 |  | 
|---|
|  | 3793 | a.set_name(name); | 
|---|
|  | 3794 |  | 
|---|
|  | 3795 | xml_attribute_struct* head = _root->first_attribute; | 
|---|
|  | 3796 |  | 
|---|
|  | 3797 | if (head) | 
|---|
|  | 3798 | { | 
|---|
|  | 3799 | a._attr->prev_attribute_c = head->prev_attribute_c; | 
|---|
|  | 3800 | head->prev_attribute_c = a._attr; | 
|---|
|  | 3801 | } | 
|---|
|  | 3802 | else | 
|---|
|  | 3803 | a._attr->prev_attribute_c = a._attr; | 
|---|
|  | 3804 |  | 
|---|
|  | 3805 | a._attr->next_attribute = head; | 
|---|
|  | 3806 | _root->first_attribute = a._attr; | 
|---|
|  | 3807 |  | 
|---|
|  | 3808 | return a; | 
|---|
|  | 3809 | } | 
|---|
|  | 3810 |  | 
|---|
|  | 3811 | xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr) | 
|---|
|  | 3812 | { | 
|---|
|  | 3813 | if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); | 
|---|
|  | 3814 |  | 
|---|
|  | 3815 | // check that attribute belongs to *this | 
|---|
|  | 3816 | xml_attribute_struct* cur = attr._attr; | 
|---|
|  | 3817 |  | 
|---|
|  | 3818 | while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; | 
|---|
|  | 3819 |  | 
|---|
|  | 3820 | if (cur != _root->first_attribute) return xml_attribute(); | 
|---|
|  | 3821 |  | 
|---|
|  | 3822 | xml_attribute a(allocate_attribute(get_allocator(_root))); | 
|---|
|  | 3823 | if (!a) return xml_attribute(); | 
|---|
|  | 3824 |  | 
|---|
|  | 3825 | a.set_name(name); | 
|---|
|  | 3826 |  | 
|---|
|  | 3827 | if (attr._attr->prev_attribute_c->next_attribute) | 
|---|
|  | 3828 | attr._attr->prev_attribute_c->next_attribute = a._attr; | 
|---|
|  | 3829 | else | 
|---|
|  | 3830 | _root->first_attribute = a._attr; | 
|---|
|  | 3831 |  | 
|---|
|  | 3832 | a._attr->prev_attribute_c = attr._attr->prev_attribute_c; | 
|---|
|  | 3833 | a._attr->next_attribute = attr._attr; | 
|---|
|  | 3834 | attr._attr->prev_attribute_c = a._attr; | 
|---|
|  | 3835 |  | 
|---|
|  | 3836 | return a; | 
|---|
|  | 3837 | } | 
|---|
|  | 3838 |  | 
|---|
|  | 3839 | xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr) | 
|---|
|  | 3840 | { | 
|---|
|  | 3841 | if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); | 
|---|
|  | 3842 |  | 
|---|
|  | 3843 | // check that attribute belongs to *this | 
|---|
|  | 3844 | xml_attribute_struct* cur = attr._attr; | 
|---|
|  | 3845 |  | 
|---|
|  | 3846 | while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; | 
|---|
|  | 3847 |  | 
|---|
|  | 3848 | if (cur != _root->first_attribute) return xml_attribute(); | 
|---|
|  | 3849 |  | 
|---|
|  | 3850 | xml_attribute a(allocate_attribute(get_allocator(_root))); | 
|---|
|  | 3851 | if (!a) return xml_attribute(); | 
|---|
|  | 3852 |  | 
|---|
|  | 3853 | a.set_name(name); | 
|---|
|  | 3854 |  | 
|---|
|  | 3855 | if (attr._attr->next_attribute) | 
|---|
|  | 3856 | attr._attr->next_attribute->prev_attribute_c = a._attr; | 
|---|
|  | 3857 | else | 
|---|
|  | 3858 | _root->first_attribute->prev_attribute_c = a._attr; | 
|---|
|  | 3859 |  | 
|---|
|  | 3860 | a._attr->next_attribute = attr._attr->next_attribute; | 
|---|
|  | 3861 | a._attr->prev_attribute_c = attr._attr; | 
|---|
|  | 3862 | attr._attr->next_attribute = a._attr; | 
|---|
|  | 3863 |  | 
|---|
|  | 3864 | return a; | 
|---|
|  | 3865 | } | 
|---|
|  | 3866 |  | 
|---|
|  | 3867 | xml_attribute xml_node::append_copy(const xml_attribute& proto) | 
|---|
|  | 3868 | { | 
|---|
|  | 3869 | if (!proto) return xml_attribute(); | 
|---|
|  | 3870 |  | 
|---|
|  | 3871 | xml_attribute result = append_attribute(proto.name()); | 
|---|
|  | 3872 | result.set_value(proto.value()); | 
|---|
|  | 3873 |  | 
|---|
|  | 3874 | return result; | 
|---|
|  | 3875 | } | 
|---|
|  | 3876 |  | 
|---|
|  | 3877 | xml_attribute xml_node::prepend_copy(const xml_attribute& proto) | 
|---|
|  | 3878 | { | 
|---|
|  | 3879 | if (!proto) return xml_attribute(); | 
|---|
|  | 3880 |  | 
|---|
|  | 3881 | xml_attribute result = prepend_attribute(proto.name()); | 
|---|
|  | 3882 | result.set_value(proto.value()); | 
|---|
|  | 3883 |  | 
|---|
|  | 3884 | return result; | 
|---|
|  | 3885 | } | 
|---|
|  | 3886 |  | 
|---|
|  | 3887 | xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) | 
|---|
|  | 3888 | { | 
|---|
|  | 3889 | if (!proto) return xml_attribute(); | 
|---|
|  | 3890 |  | 
|---|
|  | 3891 | xml_attribute result = insert_attribute_after(proto.name(), attr); | 
|---|
|  | 3892 | result.set_value(proto.value()); | 
|---|
|  | 3893 |  | 
|---|
|  | 3894 | return result; | 
|---|
|  | 3895 | } | 
|---|
|  | 3896 |  | 
|---|
|  | 3897 | xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) | 
|---|
|  | 3898 | { | 
|---|
|  | 3899 | if (!proto) return xml_attribute(); | 
|---|
|  | 3900 |  | 
|---|
|  | 3901 | xml_attribute result = insert_attribute_before(proto.name(), attr); | 
|---|
|  | 3902 | result.set_value(proto.value()); | 
|---|
|  | 3903 |  | 
|---|
|  | 3904 | return result; | 
|---|
|  | 3905 | } | 
|---|
|  | 3906 |  | 
|---|
|  | 3907 | xml_node xml_node::append_child(xml_node_type type) | 
|---|
|  | 3908 | { | 
|---|
|  | 3909 | if (!allow_insert_child(this->type(), type)) return xml_node(); | 
|---|
|  | 3910 |  | 
|---|
|  | 3911 | xml_node n(append_node(_root, get_allocator(_root), type)); | 
|---|
|  | 3912 |  | 
|---|
|  | 3913 | if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | 
|---|
|  | 3914 |  | 
|---|
|  | 3915 | return n; | 
|---|
|  | 3916 | } | 
|---|
|  | 3917 |  | 
|---|
|  | 3918 | xml_node xml_node::prepend_child(xml_node_type type) | 
|---|
|  | 3919 | { | 
|---|
|  | 3920 | if (!allow_insert_child(this->type(), type)) return xml_node(); | 
|---|
|  | 3921 |  | 
|---|
|  | 3922 | xml_node n(allocate_node(get_allocator(_root), type)); | 
|---|
|  | 3923 | if (!n) return xml_node(); | 
|---|
|  | 3924 |  | 
|---|
|  | 3925 | n._root->parent = _root; | 
|---|
|  | 3926 |  | 
|---|
|  | 3927 | xml_node_struct* head = _root->first_child; | 
|---|
|  | 3928 |  | 
|---|
|  | 3929 | if (head) | 
|---|
|  | 3930 | { | 
|---|
|  | 3931 | n._root->prev_sibling_c = head->prev_sibling_c; | 
|---|
|  | 3932 | head->prev_sibling_c = n._root; | 
|---|
|  | 3933 | } | 
|---|
|  | 3934 | else | 
|---|
|  | 3935 | n._root->prev_sibling_c = n._root; | 
|---|
|  | 3936 |  | 
|---|
|  | 3937 | n._root->next_sibling = head; | 
|---|
|  | 3938 | _root->first_child = n._root; | 
|---|
|  | 3939 |  | 
|---|
|  | 3940 | if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | 
|---|
|  | 3941 |  | 
|---|
|  | 3942 | return n; | 
|---|
|  | 3943 | } | 
|---|
|  | 3944 |  | 
|---|
|  | 3945 | xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node) | 
|---|
|  | 3946 | { | 
|---|
|  | 3947 | if (!allow_insert_child(this->type(), type)) return xml_node(); | 
|---|
|  | 3948 | if (!node._root || node._root->parent != _root) return xml_node(); | 
|---|
|  | 3949 |  | 
|---|
|  | 3950 | xml_node n(allocate_node(get_allocator(_root), type)); | 
|---|
|  | 3951 | if (!n) return xml_node(); | 
|---|
|  | 3952 |  | 
|---|
|  | 3953 | n._root->parent = _root; | 
|---|
|  | 3954 |  | 
|---|
|  | 3955 | if (node._root->prev_sibling_c->next_sibling) | 
|---|
|  | 3956 | node._root->prev_sibling_c->next_sibling = n._root; | 
|---|
|  | 3957 | else | 
|---|
|  | 3958 | _root->first_child = n._root; | 
|---|
|  | 3959 |  | 
|---|
|  | 3960 | n._root->prev_sibling_c = node._root->prev_sibling_c; | 
|---|
|  | 3961 | n._root->next_sibling = node._root; | 
|---|
|  | 3962 | node._root->prev_sibling_c = n._root; | 
|---|
|  | 3963 |  | 
|---|
|  | 3964 | if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | 
|---|
|  | 3965 |  | 
|---|
|  | 3966 | return n; | 
|---|
|  | 3967 | } | 
|---|
|  | 3968 |  | 
|---|
|  | 3969 | xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node) | 
|---|
|  | 3970 | { | 
|---|
|  | 3971 | if (!allow_insert_child(this->type(), type)) return xml_node(); | 
|---|
|  | 3972 | if (!node._root || node._root->parent != _root) return xml_node(); | 
|---|
|  | 3973 |  | 
|---|
|  | 3974 | xml_node n(allocate_node(get_allocator(_root), type)); | 
|---|
|  | 3975 | if (!n) return xml_node(); | 
|---|
|  | 3976 |  | 
|---|
|  | 3977 | n._root->parent = _root; | 
|---|
|  | 3978 |  | 
|---|
|  | 3979 | if (node._root->next_sibling) | 
|---|
|  | 3980 | node._root->next_sibling->prev_sibling_c = n._root; | 
|---|
|  | 3981 | else | 
|---|
|  | 3982 | _root->first_child->prev_sibling_c = n._root; | 
|---|
|  | 3983 |  | 
|---|
|  | 3984 | n._root->next_sibling = node._root->next_sibling; | 
|---|
|  | 3985 | n._root->prev_sibling_c = node._root; | 
|---|
|  | 3986 | node._root->next_sibling = n._root; | 
|---|
|  | 3987 |  | 
|---|
|  | 3988 | if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | 
|---|
|  | 3989 |  | 
|---|
|  | 3990 | return n; | 
|---|
|  | 3991 | } | 
|---|
|  | 3992 |  | 
|---|
|  | 3993 | xml_node xml_node::append_child(const char_t* name) | 
|---|
|  | 3994 | { | 
|---|
|  | 3995 | xml_node result = append_child(node_element); | 
|---|
|  | 3996 |  | 
|---|
|  | 3997 | result.set_name(name); | 
|---|
|  | 3998 |  | 
|---|
|  | 3999 | return result; | 
|---|
|  | 4000 | } | 
|---|
|  | 4001 |  | 
|---|
|  | 4002 | xml_node xml_node::prepend_child(const char_t* name) | 
|---|
|  | 4003 | { | 
|---|
|  | 4004 | xml_node result = prepend_child(node_element); | 
|---|
|  | 4005 |  | 
|---|
|  | 4006 | result.set_name(name); | 
|---|
|  | 4007 |  | 
|---|
|  | 4008 | return result; | 
|---|
|  | 4009 | } | 
|---|
|  | 4010 |  | 
|---|
|  | 4011 | xml_node xml_node::insert_child_after(const char_t* name, const xml_node& node) | 
|---|
|  | 4012 | { | 
|---|
|  | 4013 | xml_node result = insert_child_after(node_element, node); | 
|---|
|  | 4014 |  | 
|---|
|  | 4015 | result.set_name(name); | 
|---|
|  | 4016 |  | 
|---|
|  | 4017 | return result; | 
|---|
|  | 4018 | } | 
|---|
|  | 4019 |  | 
|---|
|  | 4020 | xml_node xml_node::insert_child_before(const char_t* name, const xml_node& node) | 
|---|
|  | 4021 | { | 
|---|
|  | 4022 | xml_node result = insert_child_before(node_element, node); | 
|---|
|  | 4023 |  | 
|---|
|  | 4024 | result.set_name(name); | 
|---|
|  | 4025 |  | 
|---|
|  | 4026 | return result; | 
|---|
|  | 4027 | } | 
|---|
|  | 4028 |  | 
|---|
|  | 4029 | xml_node xml_node::append_copy(const xml_node& proto) | 
|---|
|  | 4030 | { | 
|---|
|  | 4031 | xml_node result = append_child(proto.type()); | 
|---|
|  | 4032 |  | 
|---|
|  | 4033 | if (result) recursive_copy_skip(result, proto, result); | 
|---|
|  | 4034 |  | 
|---|
|  | 4035 | return result; | 
|---|
|  | 4036 | } | 
|---|
|  | 4037 |  | 
|---|
|  | 4038 | xml_node xml_node::prepend_copy(const xml_node& proto) | 
|---|
|  | 4039 | { | 
|---|
|  | 4040 | xml_node result = prepend_child(proto.type()); | 
|---|
|  | 4041 |  | 
|---|
|  | 4042 | if (result) recursive_copy_skip(result, proto, result); | 
|---|
|  | 4043 |  | 
|---|
|  | 4044 | return result; | 
|---|
|  | 4045 | } | 
|---|
|  | 4046 |  | 
|---|
|  | 4047 | xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) | 
|---|
|  | 4048 | { | 
|---|
|  | 4049 | xml_node result = insert_child_after(proto.type(), node); | 
|---|
|  | 4050 |  | 
|---|
|  | 4051 | if (result) recursive_copy_skip(result, proto, result); | 
|---|
|  | 4052 |  | 
|---|
|  | 4053 | return result; | 
|---|
|  | 4054 | } | 
|---|
|  | 4055 |  | 
|---|
|  | 4056 | xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) | 
|---|
|  | 4057 | { | 
|---|
|  | 4058 | xml_node result = insert_child_before(proto.type(), node); | 
|---|
|  | 4059 |  | 
|---|
|  | 4060 | if (result) recursive_copy_skip(result, proto, result); | 
|---|
|  | 4061 |  | 
|---|
|  | 4062 | return result; | 
|---|
|  | 4063 | } | 
|---|
|  | 4064 |  | 
|---|
|  | 4065 | bool xml_node::remove_attribute(const char_t* name) | 
|---|
|  | 4066 | { | 
|---|
|  | 4067 | return remove_attribute(attribute(name)); | 
|---|
|  | 4068 | } | 
|---|
|  | 4069 |  | 
|---|
|  | 4070 | bool xml_node::remove_attribute(const xml_attribute& a) | 
|---|
|  | 4071 | { | 
|---|
|  | 4072 | if (!_root || !a._attr) return false; | 
|---|
|  | 4073 |  | 
|---|
|  | 4074 | // check that attribute belongs to *this | 
|---|
|  | 4075 | xml_attribute_struct* attr = a._attr; | 
|---|
|  | 4076 |  | 
|---|
|  | 4077 | while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c; | 
|---|
|  | 4078 |  | 
|---|
|  | 4079 | if (attr != _root->first_attribute) return false; | 
|---|
|  | 4080 |  | 
|---|
|  | 4081 | if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c; | 
|---|
|  | 4082 | else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c; | 
|---|
|  | 4083 |  | 
|---|
|  | 4084 | if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute; | 
|---|
|  | 4085 | else _root->first_attribute = a._attr->next_attribute; | 
|---|
|  | 4086 |  | 
|---|
|  | 4087 | destroy_attribute(a._attr, get_allocator(_root)); | 
|---|
|  | 4088 |  | 
|---|
|  | 4089 | return true; | 
|---|
|  | 4090 | } | 
|---|
|  | 4091 |  | 
|---|
|  | 4092 | bool xml_node::remove_child(const char_t* name) | 
|---|
|  | 4093 | { | 
|---|
|  | 4094 | return remove_child(child(name)); | 
|---|
|  | 4095 | } | 
|---|
|  | 4096 |  | 
|---|
|  | 4097 | bool xml_node::remove_child(const xml_node& n) | 
|---|
|  | 4098 | { | 
|---|
|  | 4099 | if (!_root || !n._root || n._root->parent != _root) return false; | 
|---|
|  | 4100 |  | 
|---|
|  | 4101 | if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c; | 
|---|
|  | 4102 | else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c; | 
|---|
|  | 4103 |  | 
|---|
|  | 4104 | if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling; | 
|---|
|  | 4105 | else _root->first_child = n._root->next_sibling; | 
|---|
|  | 4106 |  | 
|---|
|  | 4107 | destroy_node(n._root, get_allocator(_root)); | 
|---|
|  | 4108 |  | 
|---|
|  | 4109 | return true; | 
|---|
|  | 4110 | } | 
|---|
|  | 4111 |  | 
|---|
|  | 4112 | xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const | 
|---|
|  | 4113 | { | 
|---|
|  | 4114 | if (!_root) return xml_node(); | 
|---|
|  | 4115 |  | 
|---|
|  | 4116 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | 
|---|
|  | 4117 | if (i->name && strequal(name, i->name)) | 
|---|
|  | 4118 | { | 
|---|
|  | 4119 | for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) | 
|---|
|  | 4120 | if (strequal(attr_name, a->name) && strequal(attr_value, a->value)) | 
|---|
|  | 4121 | return xml_node(i); | 
|---|
|  | 4122 | } | 
|---|
|  | 4123 |  | 
|---|
|  | 4124 | return xml_node(); | 
|---|
|  | 4125 | } | 
|---|
|  | 4126 |  | 
|---|
|  | 4127 | xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const | 
|---|
|  | 4128 | { | 
|---|
|  | 4129 | if (!_root) return xml_node(); | 
|---|
|  | 4130 |  | 
|---|
|  | 4131 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | 
|---|
|  | 4132 | for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) | 
|---|
|  | 4133 | if (strequal(attr_name, a->name) && strequal(attr_value, a->value)) | 
|---|
|  | 4134 | return xml_node(i); | 
|---|
|  | 4135 |  | 
|---|
|  | 4136 | return xml_node(); | 
|---|
|  | 4137 | } | 
|---|
|  | 4138 |  | 
|---|
|  | 4139 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 4140 | string_t xml_node::path(char_t delimiter) const | 
|---|
|  | 4141 | { | 
|---|
|  | 4142 | string_t path; | 
|---|
|  | 4143 |  | 
|---|
|  | 4144 | xml_node cursor = *this; // Make a copy. | 
|---|
|  | 4145 |  | 
|---|
|  | 4146 | path = cursor.name(); | 
|---|
|  | 4147 |  | 
|---|
|  | 4148 | while (cursor.parent()) | 
|---|
|  | 4149 | { | 
|---|
|  | 4150 | cursor = cursor.parent(); | 
|---|
|  | 4151 |  | 
|---|
|  | 4152 | string_t temp = cursor.name(); | 
|---|
|  | 4153 | temp += delimiter; | 
|---|
|  | 4154 | temp += path; | 
|---|
|  | 4155 | path.swap(temp); | 
|---|
|  | 4156 | } | 
|---|
|  | 4157 |  | 
|---|
|  | 4158 | return path; | 
|---|
|  | 4159 | } | 
|---|
|  | 4160 | #endif | 
|---|
|  | 4161 |  | 
|---|
|  | 4162 | xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const | 
|---|
|  | 4163 | { | 
|---|
|  | 4164 | xml_node found = *this; // Current search context. | 
|---|
|  | 4165 |  | 
|---|
|  | 4166 | if (!_root || !path || !path[0]) return found; | 
|---|
|  | 4167 |  | 
|---|
|  | 4168 | if (path[0] == delimiter) | 
|---|
|  | 4169 | { | 
|---|
|  | 4170 | // Absolute path; e.g. '/foo/bar' | 
|---|
|  | 4171 | found = found.root(); | 
|---|
|  | 4172 | ++path; | 
|---|
|  | 4173 | } | 
|---|
|  | 4174 |  | 
|---|
|  | 4175 | const char_t* path_segment = path; | 
|---|
|  | 4176 |  | 
|---|
|  | 4177 | while (*path_segment == delimiter) ++path_segment; | 
|---|
|  | 4178 |  | 
|---|
|  | 4179 | const char_t* path_segment_end = path_segment; | 
|---|
|  | 4180 |  | 
|---|
|  | 4181 | while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; | 
|---|
|  | 4182 |  | 
|---|
|  | 4183 | if (path_segment == path_segment_end) return found; | 
|---|
|  | 4184 |  | 
|---|
|  | 4185 | const char_t* next_segment = path_segment_end; | 
|---|
|  | 4186 |  | 
|---|
|  | 4187 | while (*next_segment == delimiter) ++next_segment; | 
|---|
|  | 4188 |  | 
|---|
|  | 4189 | if (*path_segment == '.' && path_segment + 1 == path_segment_end) | 
|---|
|  | 4190 | return found.first_element_by_path(next_segment, delimiter); | 
|---|
|  | 4191 | else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) | 
|---|
|  | 4192 | return found.parent().first_element_by_path(next_segment, delimiter); | 
|---|
|  | 4193 | else | 
|---|
|  | 4194 | { | 
|---|
|  | 4195 | for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) | 
|---|
|  | 4196 | { | 
|---|
|  | 4197 | if (j->name && strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) | 
|---|
|  | 4198 | { | 
|---|
|  | 4199 | xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); | 
|---|
|  | 4200 |  | 
|---|
|  | 4201 | if (subsearch) return subsearch; | 
|---|
|  | 4202 | } | 
|---|
|  | 4203 | } | 
|---|
|  | 4204 |  | 
|---|
|  | 4205 | return xml_node(); | 
|---|
|  | 4206 | } | 
|---|
|  | 4207 | } | 
|---|
|  | 4208 |  | 
|---|
|  | 4209 | bool xml_node::traverse(xml_tree_walker& walker) | 
|---|
|  | 4210 | { | 
|---|
|  | 4211 | walker._depth = -1; | 
|---|
|  | 4212 |  | 
|---|
|  | 4213 | xml_node arg_begin = *this; | 
|---|
|  | 4214 | if (!walker.begin(arg_begin)) return false; | 
|---|
|  | 4215 |  | 
|---|
|  | 4216 | xml_node cur = first_child(); | 
|---|
|  | 4217 |  | 
|---|
|  | 4218 | if (cur) | 
|---|
|  | 4219 | { | 
|---|
|  | 4220 | ++walker._depth; | 
|---|
|  | 4221 |  | 
|---|
|  | 4222 | do | 
|---|
|  | 4223 | { | 
|---|
|  | 4224 | xml_node arg_for_each = cur; | 
|---|
|  | 4225 | if (!walker.for_each(arg_for_each)) | 
|---|
|  | 4226 | return false; | 
|---|
|  | 4227 |  | 
|---|
|  | 4228 | if (cur.first_child()) | 
|---|
|  | 4229 | { | 
|---|
|  | 4230 | ++walker._depth; | 
|---|
|  | 4231 | cur = cur.first_child(); | 
|---|
|  | 4232 | } | 
|---|
|  | 4233 | else if (cur.next_sibling()) | 
|---|
|  | 4234 | cur = cur.next_sibling(); | 
|---|
|  | 4235 | else | 
|---|
|  | 4236 | { | 
|---|
|  | 4237 | // Borland C++ workaround | 
|---|
|  | 4238 | while (!cur.next_sibling() && cur != *this && (bool)cur.parent()) | 
|---|
|  | 4239 | { | 
|---|
|  | 4240 | --walker._depth; | 
|---|
|  | 4241 | cur = cur.parent(); | 
|---|
|  | 4242 | } | 
|---|
|  | 4243 |  | 
|---|
|  | 4244 | if (cur != *this) | 
|---|
|  | 4245 | cur = cur.next_sibling(); | 
|---|
|  | 4246 | } | 
|---|
|  | 4247 | } | 
|---|
|  | 4248 | while (cur && cur != *this); | 
|---|
|  | 4249 | } | 
|---|
|  | 4250 |  | 
|---|
|  | 4251 | assert(walker._depth == -1); | 
|---|
|  | 4252 |  | 
|---|
|  | 4253 | xml_node arg_end = *this; | 
|---|
|  | 4254 | return walker.end(arg_end); | 
|---|
|  | 4255 | } | 
|---|
|  | 4256 |  | 
|---|
|  | 4257 | size_t xml_node::hash_value() const | 
|---|
|  | 4258 | { | 
|---|
|  | 4259 | return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct)); | 
|---|
|  | 4260 | } | 
|---|
|  | 4261 |  | 
|---|
|  | 4262 | xml_node_struct* xml_node::internal_object() const | 
|---|
|  | 4263 | { | 
|---|
|  | 4264 | return _root; | 
|---|
|  | 4265 | } | 
|---|
|  | 4266 |  | 
|---|
|  | 4267 | void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const | 
|---|
|  | 4268 | { | 
|---|
|  | 4269 | if (!_root) return; | 
|---|
|  | 4270 |  | 
|---|
|  | 4271 | xml_buffered_writer buffered_writer(writer, encoding); | 
|---|
|  | 4272 |  | 
|---|
|  | 4273 | node_output(buffered_writer, *this, indent, flags, depth); | 
|---|
|  | 4274 | } | 
|---|
|  | 4275 |  | 
|---|
|  | 4276 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 4277 | void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const | 
|---|
|  | 4278 | { | 
|---|
|  | 4279 | xml_writer_stream writer(stream); | 
|---|
|  | 4280 |  | 
|---|
|  | 4281 | print(writer, indent, flags, encoding, depth); | 
|---|
|  | 4282 | } | 
|---|
|  | 4283 |  | 
|---|
|  | 4284 | void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const | 
|---|
|  | 4285 | { | 
|---|
|  | 4286 | xml_writer_stream writer(stream); | 
|---|
|  | 4287 |  | 
|---|
|  | 4288 | print(writer, indent, flags, encoding_wchar, depth); | 
|---|
|  | 4289 | } | 
|---|
|  | 4290 | #endif | 
|---|
|  | 4291 |  | 
|---|
|  | 4292 | ptrdiff_t xml_node::offset_debug() const | 
|---|
|  | 4293 | { | 
|---|
|  | 4294 | xml_node_struct* r = root()._root; | 
|---|
|  | 4295 |  | 
|---|
|  | 4296 | if (!r) return -1; | 
|---|
|  | 4297 |  | 
|---|
|  | 4298 | const char_t* buffer = static_cast<xml_document_struct*>(r)->buffer; | 
|---|
|  | 4299 |  | 
|---|
|  | 4300 | if (!buffer) return -1; | 
|---|
|  | 4301 |  | 
|---|
|  | 4302 | switch (type()) | 
|---|
|  | 4303 | { | 
|---|
|  | 4304 | case node_document: | 
|---|
|  | 4305 | return 0; | 
|---|
|  | 4306 |  | 
|---|
|  | 4307 | case node_element: | 
|---|
|  | 4308 | case node_declaration: | 
|---|
|  | 4309 | case node_pi: | 
|---|
|  | 4310 | return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer; | 
|---|
|  | 4311 |  | 
|---|
|  | 4312 | case node_pcdata: | 
|---|
|  | 4313 | case node_cdata: | 
|---|
|  | 4314 | case node_comment: | 
|---|
|  | 4315 | case node_doctype: | 
|---|
|  | 4316 | return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer; | 
|---|
|  | 4317 |  | 
|---|
|  | 4318 | default: | 
|---|
|  | 4319 | return -1; | 
|---|
|  | 4320 | } | 
|---|
|  | 4321 | } | 
|---|
|  | 4322 |  | 
|---|
|  | 4323 | #ifdef __BORLANDC__ | 
|---|
|  | 4324 | bool operator&&(const xml_node& lhs, bool rhs) | 
|---|
|  | 4325 | { | 
|---|
|  | 4326 | return (bool)lhs && rhs; | 
|---|
|  | 4327 | } | 
|---|
|  | 4328 |  | 
|---|
|  | 4329 | bool operator||(const xml_node& lhs, bool rhs) | 
|---|
|  | 4330 | { | 
|---|
|  | 4331 | return (bool)lhs || rhs; | 
|---|
|  | 4332 | } | 
|---|
|  | 4333 | #endif | 
|---|
|  | 4334 |  | 
|---|
|  | 4335 | xml_node_iterator::xml_node_iterator() | 
|---|
|  | 4336 | { | 
|---|
|  | 4337 | } | 
|---|
|  | 4338 |  | 
|---|
|  | 4339 | xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) | 
|---|
|  | 4340 | { | 
|---|
|  | 4341 | } | 
|---|
|  | 4342 |  | 
|---|
|  | 4343 | xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) | 
|---|
|  | 4344 | { | 
|---|
|  | 4345 | } | 
|---|
|  | 4346 |  | 
|---|
|  | 4347 | bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const | 
|---|
|  | 4348 | { | 
|---|
|  | 4349 | return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; | 
|---|
|  | 4350 | } | 
|---|
|  | 4351 |  | 
|---|
|  | 4352 | bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const | 
|---|
|  | 4353 | { | 
|---|
|  | 4354 | return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; | 
|---|
|  | 4355 | } | 
|---|
|  | 4356 |  | 
|---|
|  | 4357 | xml_node& xml_node_iterator::operator*() | 
|---|
|  | 4358 | { | 
|---|
|  | 4359 | assert(_wrap._root); | 
|---|
|  | 4360 | return _wrap; | 
|---|
|  | 4361 | } | 
|---|
|  | 4362 |  | 
|---|
|  | 4363 | xml_node* xml_node_iterator::operator->() | 
|---|
|  | 4364 | { | 
|---|
|  | 4365 | assert(_wrap._root); | 
|---|
|  | 4366 | return &_wrap; | 
|---|
|  | 4367 | } | 
|---|
|  | 4368 |  | 
|---|
|  | 4369 | const xml_node_iterator& xml_node_iterator::operator++() | 
|---|
|  | 4370 | { | 
|---|
|  | 4371 | assert(_wrap._root); | 
|---|
|  | 4372 | _wrap._root = _wrap._root->next_sibling; | 
|---|
|  | 4373 | return *this; | 
|---|
|  | 4374 | } | 
|---|
|  | 4375 |  | 
|---|
|  | 4376 | xml_node_iterator xml_node_iterator::operator++(int) | 
|---|
|  | 4377 | { | 
|---|
|  | 4378 | xml_node_iterator temp = *this; | 
|---|
|  | 4379 | ++*this; | 
|---|
|  | 4380 | return temp; | 
|---|
|  | 4381 | } | 
|---|
|  | 4382 |  | 
|---|
|  | 4383 | const xml_node_iterator& xml_node_iterator::operator--() | 
|---|
|  | 4384 | { | 
|---|
|  | 4385 | _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); | 
|---|
|  | 4386 | return *this; | 
|---|
|  | 4387 | } | 
|---|
|  | 4388 |  | 
|---|
|  | 4389 | xml_node_iterator xml_node_iterator::operator--(int) | 
|---|
|  | 4390 | { | 
|---|
|  | 4391 | xml_node_iterator temp = *this; | 
|---|
|  | 4392 | --*this; | 
|---|
|  | 4393 | return temp; | 
|---|
|  | 4394 | } | 
|---|
|  | 4395 |  | 
|---|
|  | 4396 | xml_attribute_iterator::xml_attribute_iterator() | 
|---|
|  | 4397 | { | 
|---|
|  | 4398 | } | 
|---|
|  | 4399 |  | 
|---|
|  | 4400 | xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) | 
|---|
|  | 4401 | { | 
|---|
|  | 4402 | } | 
|---|
|  | 4403 |  | 
|---|
|  | 4404 | xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) | 
|---|
|  | 4405 | { | 
|---|
|  | 4406 | } | 
|---|
|  | 4407 |  | 
|---|
|  | 4408 | bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const | 
|---|
|  | 4409 | { | 
|---|
|  | 4410 | return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; | 
|---|
|  | 4411 | } | 
|---|
|  | 4412 |  | 
|---|
|  | 4413 | bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const | 
|---|
|  | 4414 | { | 
|---|
|  | 4415 | return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; | 
|---|
|  | 4416 | } | 
|---|
|  | 4417 |  | 
|---|
|  | 4418 | xml_attribute& xml_attribute_iterator::operator*() | 
|---|
|  | 4419 | { | 
|---|
|  | 4420 | assert(_wrap._attr); | 
|---|
|  | 4421 | return _wrap; | 
|---|
|  | 4422 | } | 
|---|
|  | 4423 |  | 
|---|
|  | 4424 | xml_attribute* xml_attribute_iterator::operator->() | 
|---|
|  | 4425 | { | 
|---|
|  | 4426 | assert(_wrap._attr); | 
|---|
|  | 4427 | return &_wrap; | 
|---|
|  | 4428 | } | 
|---|
|  | 4429 |  | 
|---|
|  | 4430 | const xml_attribute_iterator& xml_attribute_iterator::operator++() | 
|---|
|  | 4431 | { | 
|---|
|  | 4432 | assert(_wrap._attr); | 
|---|
|  | 4433 | _wrap._attr = _wrap._attr->next_attribute; | 
|---|
|  | 4434 | return *this; | 
|---|
|  | 4435 | } | 
|---|
|  | 4436 |  | 
|---|
|  | 4437 | xml_attribute_iterator xml_attribute_iterator::operator++(int) | 
|---|
|  | 4438 | { | 
|---|
|  | 4439 | xml_attribute_iterator temp = *this; | 
|---|
|  | 4440 | ++*this; | 
|---|
|  | 4441 | return temp; | 
|---|
|  | 4442 | } | 
|---|
|  | 4443 |  | 
|---|
|  | 4444 | const xml_attribute_iterator& xml_attribute_iterator::operator--() | 
|---|
|  | 4445 | { | 
|---|
|  | 4446 | _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); | 
|---|
|  | 4447 | return *this; | 
|---|
|  | 4448 | } | 
|---|
|  | 4449 |  | 
|---|
|  | 4450 | xml_attribute_iterator xml_attribute_iterator::operator--(int) | 
|---|
|  | 4451 | { | 
|---|
|  | 4452 | xml_attribute_iterator temp = *this; | 
|---|
|  | 4453 | --*this; | 
|---|
|  | 4454 | return temp; | 
|---|
|  | 4455 | } | 
|---|
|  | 4456 |  | 
|---|
|  | 4457 | xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) | 
|---|
|  | 4458 | { | 
|---|
|  | 4459 | } | 
|---|
|  | 4460 |  | 
|---|
|  | 4461 | xml_parse_result::operator bool() const | 
|---|
|  | 4462 | { | 
|---|
|  | 4463 | return status == status_ok; | 
|---|
|  | 4464 | } | 
|---|
|  | 4465 |  | 
|---|
|  | 4466 | const char* xml_parse_result::description() const | 
|---|
|  | 4467 | { | 
|---|
|  | 4468 | switch (status) | 
|---|
|  | 4469 | { | 
|---|
|  | 4470 | case status_ok: return "No error"; | 
|---|
|  | 4471 |  | 
|---|
|  | 4472 | case status_file_not_found: return "File was not found"; | 
|---|
|  | 4473 | case status_io_error: return "Error reading from file/stream"; | 
|---|
|  | 4474 | case status_out_of_memory: return "Could not allocate memory"; | 
|---|
|  | 4475 | case status_internal_error: return "Internal error occurred"; | 
|---|
|  | 4476 |  | 
|---|
|  | 4477 | case status_unrecognized_tag: return "Could not determine tag type"; | 
|---|
|  | 4478 |  | 
|---|
|  | 4479 | case status_bad_pi: return "Error parsing document declaration/processing instruction"; | 
|---|
|  | 4480 | case status_bad_comment: return "Error parsing comment"; | 
|---|
|  | 4481 | case status_bad_cdata: return "Error parsing CDATA section"; | 
|---|
|  | 4482 | case status_bad_doctype: return "Error parsing document type declaration"; | 
|---|
|  | 4483 | case status_bad_pcdata: return "Error parsing PCDATA section"; | 
|---|
|  | 4484 | case status_bad_start_element: return "Error parsing start element tag"; | 
|---|
|  | 4485 | case status_bad_attribute: return "Error parsing element attribute"; | 
|---|
|  | 4486 | case status_bad_end_element: return "Error parsing end element tag"; | 
|---|
|  | 4487 | case status_end_element_mismatch: return "Start-end tags mismatch"; | 
|---|
|  | 4488 |  | 
|---|
|  | 4489 | default: return "Unknown error"; | 
|---|
|  | 4490 | } | 
|---|
|  | 4491 | } | 
|---|
|  | 4492 |  | 
|---|
|  | 4493 | xml_document::xml_document(): _buffer(0) | 
|---|
|  | 4494 | { | 
|---|
|  | 4495 | create(); | 
|---|
|  | 4496 | } | 
|---|
|  | 4497 |  | 
|---|
|  | 4498 | xml_document::~xml_document() | 
|---|
|  | 4499 | { | 
|---|
|  | 4500 | destroy(); | 
|---|
|  | 4501 | } | 
|---|
|  | 4502 |  | 
|---|
|  | 4503 | void xml_document::reset() | 
|---|
|  | 4504 | { | 
|---|
|  | 4505 | destroy(); | 
|---|
|  | 4506 | create(); | 
|---|
|  | 4507 | } | 
|---|
|  | 4508 |  | 
|---|
|  | 4509 | void xml_document::reset(const xml_document& proto) | 
|---|
|  | 4510 | { | 
|---|
|  | 4511 | reset(); | 
|---|
|  | 4512 |  | 
|---|
|  | 4513 | for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling()) | 
|---|
|  | 4514 | append_copy(cur); | 
|---|
|  | 4515 | } | 
|---|
|  | 4516 |  | 
|---|
|  | 4517 | void xml_document::create() | 
|---|
|  | 4518 | { | 
|---|
|  | 4519 | // initialize sentinel page | 
|---|
|  | 4520 | STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory)); | 
|---|
|  | 4521 |  | 
|---|
|  | 4522 | // align upwards to page boundary | 
|---|
|  | 4523 | void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); | 
|---|
|  | 4524 |  | 
|---|
|  | 4525 | // prepare page structure | 
|---|
|  | 4526 | xml_memory_page* page = xml_memory_page::construct(page_memory); | 
|---|
|  | 4527 |  | 
|---|
|  | 4528 | page->busy_size = xml_memory_page_size; | 
|---|
|  | 4529 |  | 
|---|
|  | 4530 | // allocate new root | 
|---|
|  | 4531 | _root = new (page->data) xml_document_struct(page); | 
|---|
|  | 4532 | _root->prev_sibling_c = _root; | 
|---|
|  | 4533 |  | 
|---|
|  | 4534 | // setup sentinel page | 
|---|
|  | 4535 | page->allocator = static_cast<xml_document_struct*>(_root); | 
|---|
|  | 4536 | } | 
|---|
|  | 4537 |  | 
|---|
|  | 4538 | void xml_document::destroy() | 
|---|
|  | 4539 | { | 
|---|
|  | 4540 | // destroy static storage | 
|---|
|  | 4541 | if (_buffer) | 
|---|
|  | 4542 | { | 
|---|
|  | 4543 | global_deallocate(_buffer); | 
|---|
|  | 4544 | _buffer = 0; | 
|---|
|  | 4545 | } | 
|---|
|  | 4546 |  | 
|---|
|  | 4547 | // destroy dynamic storage, leave sentinel page (it's in static memory) | 
|---|
|  | 4548 | if (_root) | 
|---|
|  | 4549 | { | 
|---|
|  | 4550 | xml_memory_page* root_page = reinterpret_cast<xml_memory_page*>(_root->header & xml_memory_page_pointer_mask); | 
|---|
|  | 4551 | assert(root_page && !root_page->prev && !root_page->memory); | 
|---|
|  | 4552 |  | 
|---|
|  | 4553 | // destroy all pages | 
|---|
|  | 4554 | for (xml_memory_page* page = root_page->next; page; ) | 
|---|
|  | 4555 | { | 
|---|
|  | 4556 | xml_memory_page* next = page->next; | 
|---|
|  | 4557 |  | 
|---|
|  | 4558 | xml_allocator::deallocate_page(page); | 
|---|
|  | 4559 |  | 
|---|
|  | 4560 | page = next; | 
|---|
|  | 4561 | } | 
|---|
|  | 4562 |  | 
|---|
|  | 4563 | // cleanup root page | 
|---|
|  | 4564 | root_page->allocator = 0; | 
|---|
|  | 4565 | root_page->next = 0; | 
|---|
|  | 4566 | root_page->busy_size = root_page->freed_size = 0; | 
|---|
|  | 4567 |  | 
|---|
|  | 4568 | _root = 0; | 
|---|
|  | 4569 | } | 
|---|
|  | 4570 | } | 
|---|
|  | 4571 |  | 
|---|
|  | 4572 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 4573 | xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) | 
|---|
|  | 4574 | { | 
|---|
|  | 4575 | reset(); | 
|---|
|  | 4576 |  | 
|---|
|  | 4577 | return load_stream_impl(*this, stream, options, encoding); | 
|---|
|  | 4578 | } | 
|---|
|  | 4579 |  | 
|---|
|  | 4580 | xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options) | 
|---|
|  | 4581 | { | 
|---|
|  | 4582 | reset(); | 
|---|
|  | 4583 |  | 
|---|
|  | 4584 | return load_stream_impl(*this, stream, options, encoding_wchar); | 
|---|
|  | 4585 | } | 
|---|
|  | 4586 | #endif | 
|---|
|  | 4587 |  | 
|---|
|  | 4588 | xml_parse_result xml_document::load(const char_t* contents, unsigned int options) | 
|---|
|  | 4589 | { | 
|---|
|  | 4590 | // Force native encoding (skip autodetection) | 
|---|
|  | 4591 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 4592 | xml_encoding encoding = encoding_wchar; | 
|---|
|  | 4593 | #else | 
|---|
|  | 4594 | xml_encoding encoding = encoding_utf8; | 
|---|
|  | 4595 | #endif | 
|---|
|  | 4596 |  | 
|---|
|  | 4597 | return load_buffer(contents, strlength(contents) * sizeof(char_t), options, encoding); | 
|---|
|  | 4598 | } | 
|---|
|  | 4599 |  | 
|---|
|  | 4600 | xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding) | 
|---|
|  | 4601 | { | 
|---|
|  | 4602 | reset(); | 
|---|
|  | 4603 |  | 
|---|
|  | 4604 | FILE* file = fopen(path, "rb"); | 
|---|
|  | 4605 |  | 
|---|
|  | 4606 | return load_file_impl(*this, file, options, encoding); | 
|---|
|  | 4607 | } | 
|---|
|  | 4608 |  | 
|---|
|  | 4609 | xml_parse_result xml_document::load_file(const wchar_t* path, unsigned int options, xml_encoding encoding) | 
|---|
|  | 4610 | { | 
|---|
|  | 4611 | reset(); | 
|---|
|  | 4612 |  | 
|---|
|  | 4613 | FILE* file = open_file_wide(path, L"rb"); | 
|---|
|  | 4614 |  | 
|---|
|  | 4615 | return load_file_impl(*this, file, options, encoding); | 
|---|
|  | 4616 | } | 
|---|
|  | 4617 |  | 
|---|
|  | 4618 | xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own) | 
|---|
|  | 4619 | { | 
|---|
|  | 4620 | reset(); | 
|---|
|  | 4621 |  | 
|---|
|  | 4622 | // check input buffer | 
|---|
|  | 4623 | assert(contents || size == 0); | 
|---|
|  | 4624 |  | 
|---|
|  | 4625 | // get actual encoding | 
|---|
|  | 4626 | xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size); | 
|---|
|  | 4627 |  | 
|---|
|  | 4628 | // get private buffer | 
|---|
|  | 4629 | char_t* buffer = 0; | 
|---|
|  | 4630 | size_t length = 0; | 
|---|
|  | 4631 |  | 
|---|
|  | 4632 | if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory); | 
|---|
|  | 4633 |  | 
|---|
|  | 4634 | // delete original buffer if we performed a conversion | 
|---|
|  | 4635 | if (own && buffer != contents && contents) global_deallocate(contents); | 
|---|
|  | 4636 |  | 
|---|
|  | 4637 | // parse | 
|---|
|  | 4638 | xml_parse_result res = xml_parser::parse(buffer, length, _root, options); | 
|---|
|  | 4639 |  | 
|---|
|  | 4640 | // remember encoding | 
|---|
|  | 4641 | res.encoding = buffer_encoding; | 
|---|
|  | 4642 |  | 
|---|
|  | 4643 | // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself | 
|---|
|  | 4644 | if (own || buffer != contents) _buffer = buffer; | 
|---|
|  | 4645 |  | 
|---|
|  | 4646 | return res; | 
|---|
|  | 4647 | } | 
|---|
|  | 4648 |  | 
|---|
|  | 4649 | xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) | 
|---|
|  | 4650 | { | 
|---|
|  | 4651 | return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false); | 
|---|
|  | 4652 | } | 
|---|
|  | 4653 |  | 
|---|
|  | 4654 | xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) | 
|---|
|  | 4655 | { | 
|---|
|  | 4656 | return load_buffer_impl(contents, size, options, encoding, true, false); | 
|---|
|  | 4657 | } | 
|---|
|  | 4658 |  | 
|---|
|  | 4659 | xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) | 
|---|
|  | 4660 | { | 
|---|
|  | 4661 | return load_buffer_impl(contents, size, options, encoding, true, true); | 
|---|
|  | 4662 | } | 
|---|
|  | 4663 |  | 
|---|
|  | 4664 | void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const | 
|---|
|  | 4665 | { | 
|---|
|  | 4666 | if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding)); | 
|---|
|  | 4667 |  | 
|---|
|  | 4668 | xml_buffered_writer buffered_writer(writer, encoding); | 
|---|
|  | 4669 |  | 
|---|
|  | 4670 | if (!(flags & format_no_declaration) && !has_declaration(*this)) | 
|---|
|  | 4671 | { | 
|---|
|  | 4672 | buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\"?>")); | 
|---|
|  | 4673 | if (!(flags & format_raw)) buffered_writer.write('\n'); | 
|---|
|  | 4674 | } | 
|---|
|  | 4675 |  | 
|---|
|  | 4676 | node_output(buffered_writer, *this, indent, flags, 0); | 
|---|
|  | 4677 | } | 
|---|
|  | 4678 |  | 
|---|
|  | 4679 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 4680 | void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const | 
|---|
|  | 4681 | { | 
|---|
|  | 4682 | xml_writer_stream writer(stream); | 
|---|
|  | 4683 |  | 
|---|
|  | 4684 | save(writer, indent, flags, encoding); | 
|---|
|  | 4685 | } | 
|---|
|  | 4686 |  | 
|---|
|  | 4687 | void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const | 
|---|
|  | 4688 | { | 
|---|
|  | 4689 | xml_writer_stream writer(stream); | 
|---|
|  | 4690 |  | 
|---|
|  | 4691 | save(writer, indent, flags, encoding_wchar); | 
|---|
|  | 4692 | } | 
|---|
|  | 4693 | #endif | 
|---|
|  | 4694 |  | 
|---|
|  | 4695 | bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const | 
|---|
|  | 4696 | { | 
|---|
|  | 4697 | FILE* file = fopen(path, "wb"); | 
|---|
|  | 4698 | if (!file) return false; | 
|---|
|  | 4699 |  | 
|---|
|  | 4700 | xml_writer_file writer(file); | 
|---|
|  | 4701 | save(writer, indent, flags, encoding); | 
|---|
|  | 4702 |  | 
|---|
|  | 4703 | fclose(file); | 
|---|
|  | 4704 |  | 
|---|
|  | 4705 | return true; | 
|---|
|  | 4706 | } | 
|---|
|  | 4707 |  | 
|---|
|  | 4708 | bool xml_document::save_file(const wchar_t* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const | 
|---|
|  | 4709 | { | 
|---|
|  | 4710 | FILE* file = open_file_wide(path, L"wb"); | 
|---|
|  | 4711 | if (!file) return false; | 
|---|
|  | 4712 |  | 
|---|
|  | 4713 | xml_writer_file writer(file); | 
|---|
|  | 4714 | save(writer, indent, flags, encoding); | 
|---|
|  | 4715 |  | 
|---|
|  | 4716 | fclose(file); | 
|---|
|  | 4717 |  | 
|---|
|  | 4718 | return true; | 
|---|
|  | 4719 | } | 
|---|
|  | 4720 |  | 
|---|
|  | 4721 | xml_node xml_document::document_element() const | 
|---|
|  | 4722 | { | 
|---|
|  | 4723 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | 
|---|
|  | 4724 | if ((i->header & xml_memory_page_type_mask) + 1 == node_element) | 
|---|
|  | 4725 | return xml_node(i); | 
|---|
|  | 4726 |  | 
|---|
|  | 4727 | return xml_node(); | 
|---|
|  | 4728 | } | 
|---|
|  | 4729 |  | 
|---|
|  | 4730 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 4731 | std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) | 
|---|
|  | 4732 | { | 
|---|
|  | 4733 | assert(str); | 
|---|
|  | 4734 |  | 
|---|
|  | 4735 | return as_utf8_impl(str, wcslen(str)); | 
|---|
|  | 4736 | } | 
|---|
|  | 4737 |  | 
|---|
|  | 4738 | std::string PUGIXML_FUNCTION as_utf8(const std::wstring& str) | 
|---|
|  | 4739 | { | 
|---|
|  | 4740 | return as_utf8_impl(str.c_str(), str.size()); | 
|---|
|  | 4741 | } | 
|---|
|  | 4742 |  | 
|---|
|  | 4743 | std::wstring PUGIXML_FUNCTION as_wide(const char* str) | 
|---|
|  | 4744 | { | 
|---|
|  | 4745 | assert(str); | 
|---|
|  | 4746 |  | 
|---|
|  | 4747 | return as_wide_impl(str, strlen(str)); | 
|---|
|  | 4748 | } | 
|---|
|  | 4749 |  | 
|---|
|  | 4750 | std::wstring PUGIXML_FUNCTION as_wide(const std::string& str) | 
|---|
|  | 4751 | { | 
|---|
|  | 4752 | return as_wide_impl(str.c_str(), str.size()); | 
|---|
|  | 4753 | } | 
|---|
|  | 4754 | #endif | 
|---|
|  | 4755 |  | 
|---|
|  | 4756 | void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) | 
|---|
|  | 4757 | { | 
|---|
|  | 4758 | global_allocate = allocate; | 
|---|
|  | 4759 | global_deallocate = deallocate; | 
|---|
|  | 4760 | } | 
|---|
|  | 4761 |  | 
|---|
|  | 4762 | allocation_function PUGIXML_FUNCTION get_memory_allocation_function() | 
|---|
|  | 4763 | { | 
|---|
|  | 4764 | return global_allocate; | 
|---|
|  | 4765 | } | 
|---|
|  | 4766 |  | 
|---|
|  | 4767 | deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() | 
|---|
|  | 4768 | { | 
|---|
|  | 4769 | return global_deallocate; | 
|---|
|  | 4770 | } | 
|---|
|  | 4771 | } | 
|---|
|  | 4772 |  | 
|---|
|  | 4773 | #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) | 
|---|
|  | 4774 | namespace std | 
|---|
|  | 4775 | { | 
|---|
|  | 4776 | // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) | 
|---|
|  | 4777 | std::bidirectional_iterator_tag _Iter_cat(const xml_node_iterator&) | 
|---|
|  | 4778 | { | 
|---|
|  | 4779 | return std::bidirectional_iterator_tag(); | 
|---|
|  | 4780 | } | 
|---|
|  | 4781 |  | 
|---|
|  | 4782 | std::bidirectional_iterator_tag _Iter_cat(const xml_attribute_iterator&) | 
|---|
|  | 4783 | { | 
|---|
|  | 4784 | return std::bidirectional_iterator_tag(); | 
|---|
|  | 4785 | } | 
|---|
|  | 4786 | } | 
|---|
|  | 4787 | #endif | 
|---|
|  | 4788 |  | 
|---|
|  | 4789 | #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) | 
|---|
|  | 4790 | namespace std | 
|---|
|  | 4791 | { | 
|---|
|  | 4792 | // Workarounds for (non-standard) iterator category detection | 
|---|
|  | 4793 | std::bidirectional_iterator_tag __iterator_category(const xml_node_iterator&) | 
|---|
|  | 4794 | { | 
|---|
|  | 4795 | return std::bidirectional_iterator_tag(); | 
|---|
|  | 4796 | } | 
|---|
|  | 4797 |  | 
|---|
|  | 4798 | std::bidirectional_iterator_tag __iterator_category(const xml_attribute_iterator&) | 
|---|
|  | 4799 | { | 
|---|
|  | 4800 | return std::bidirectional_iterator_tag(); | 
|---|
|  | 4801 | } | 
|---|
|  | 4802 | } | 
|---|
|  | 4803 | #endif | 
|---|
|  | 4804 |  | 
|---|
|  | 4805 | #ifndef PUGIXML_NO_XPATH | 
|---|
|  | 4806 |  | 
|---|
|  | 4807 | // STL replacements | 
|---|
|  | 4808 | namespace | 
|---|
|  | 4809 | { | 
|---|
|  | 4810 | struct equal_to | 
|---|
|  | 4811 | { | 
|---|
|  | 4812 | template <typename T> bool operator()(const T& lhs, const T& rhs) const | 
|---|
|  | 4813 | { | 
|---|
|  | 4814 | return lhs == rhs; | 
|---|
|  | 4815 | } | 
|---|
|  | 4816 | }; | 
|---|
|  | 4817 |  | 
|---|
|  | 4818 | struct not_equal_to | 
|---|
|  | 4819 | { | 
|---|
|  | 4820 | template <typename T> bool operator()(const T& lhs, const T& rhs) const | 
|---|
|  | 4821 | { | 
|---|
|  | 4822 | return lhs != rhs; | 
|---|
|  | 4823 | } | 
|---|
|  | 4824 | }; | 
|---|
|  | 4825 |  | 
|---|
|  | 4826 | struct less | 
|---|
|  | 4827 | { | 
|---|
|  | 4828 | template <typename T> bool operator()(const T& lhs, const T& rhs) const | 
|---|
|  | 4829 | { | 
|---|
|  | 4830 | return lhs < rhs; | 
|---|
|  | 4831 | } | 
|---|
|  | 4832 | }; | 
|---|
|  | 4833 |  | 
|---|
|  | 4834 | struct less_equal | 
|---|
|  | 4835 | { | 
|---|
|  | 4836 | template <typename T> bool operator()(const T& lhs, const T& rhs) const | 
|---|
|  | 4837 | { | 
|---|
|  | 4838 | return lhs <= rhs; | 
|---|
|  | 4839 | } | 
|---|
|  | 4840 | }; | 
|---|
|  | 4841 |  | 
|---|
|  | 4842 | template <typename T> void swap(T& lhs, T& rhs) | 
|---|
|  | 4843 | { | 
|---|
|  | 4844 | T temp = lhs; | 
|---|
|  | 4845 | lhs = rhs; | 
|---|
|  | 4846 | rhs = temp; | 
|---|
|  | 4847 | } | 
|---|
|  | 4848 |  | 
|---|
|  | 4849 | template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred) | 
|---|
|  | 4850 | { | 
|---|
|  | 4851 | I result = begin; | 
|---|
|  | 4852 |  | 
|---|
|  | 4853 | for (I it = begin + 1; it != end; ++it) | 
|---|
|  | 4854 | if (pred(*it, *result)) | 
|---|
|  | 4855 | result = it; | 
|---|
|  | 4856 |  | 
|---|
|  | 4857 | return result; | 
|---|
|  | 4858 | } | 
|---|
|  | 4859 |  | 
|---|
|  | 4860 | template <typename I> void reverse(I begin, I end) | 
|---|
|  | 4861 | { | 
|---|
|  | 4862 | while (begin + 1 < end) swap(*begin++, *--end); | 
|---|
|  | 4863 | } | 
|---|
|  | 4864 |  | 
|---|
|  | 4865 | template <typename I> I unique(I begin, I end) | 
|---|
|  | 4866 | { | 
|---|
|  | 4867 | // fast skip head | 
|---|
|  | 4868 | while (begin + 1 < end && *begin != *(begin + 1)) begin++; | 
|---|
|  | 4869 |  | 
|---|
|  | 4870 | if (begin == end) return begin; | 
|---|
|  | 4871 |  | 
|---|
|  | 4872 | // last written element | 
|---|
|  | 4873 | I write = begin++; | 
|---|
|  | 4874 |  | 
|---|
|  | 4875 | // merge unique elements | 
|---|
|  | 4876 | while (begin != end) | 
|---|
|  | 4877 | { | 
|---|
|  | 4878 | if (*begin != *write) | 
|---|
|  | 4879 | *++write = *begin++; | 
|---|
|  | 4880 | else | 
|---|
|  | 4881 | begin++; | 
|---|
|  | 4882 | } | 
|---|
|  | 4883 |  | 
|---|
|  | 4884 | // past-the-end (write points to live element) | 
|---|
|  | 4885 | return write + 1; | 
|---|
|  | 4886 | } | 
|---|
|  | 4887 |  | 
|---|
|  | 4888 | template <typename I> void copy_backwards(I begin, I end, I target) | 
|---|
|  | 4889 | { | 
|---|
|  | 4890 | while (begin != end) *--target = *--end; | 
|---|
|  | 4891 | } | 
|---|
|  | 4892 |  | 
|---|
|  | 4893 | template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*) | 
|---|
|  | 4894 | { | 
|---|
|  | 4895 | assert(begin != end); | 
|---|
|  | 4896 |  | 
|---|
|  | 4897 | for (I it = begin + 1; it != end; ++it) | 
|---|
|  | 4898 | { | 
|---|
|  | 4899 | T val = *it; | 
|---|
|  | 4900 |  | 
|---|
|  | 4901 | if (pred(val, *begin)) | 
|---|
|  | 4902 | { | 
|---|
|  | 4903 | // move to front | 
|---|
|  | 4904 | copy_backwards(begin, it, it + 1); | 
|---|
|  | 4905 | *begin = val; | 
|---|
|  | 4906 | } | 
|---|
|  | 4907 | else | 
|---|
|  | 4908 | { | 
|---|
|  | 4909 | I hole = it; | 
|---|
|  | 4910 |  | 
|---|
|  | 4911 | // move hole backwards | 
|---|
|  | 4912 | while (pred(val, *(hole - 1))) | 
|---|
|  | 4913 | { | 
|---|
|  | 4914 | *hole = *(hole - 1); | 
|---|
|  | 4915 | hole--; | 
|---|
|  | 4916 | } | 
|---|
|  | 4917 |  | 
|---|
|  | 4918 | // fill hole with element | 
|---|
|  | 4919 | *hole = val; | 
|---|
|  | 4920 | } | 
|---|
|  | 4921 | } | 
|---|
|  | 4922 | } | 
|---|
|  | 4923 |  | 
|---|
|  | 4924 | // std variant for elements with == | 
|---|
|  | 4925 | template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend) | 
|---|
|  | 4926 | { | 
|---|
|  | 4927 | I eqbeg = middle, eqend = middle + 1; | 
|---|
|  | 4928 |  | 
|---|
|  | 4929 | // expand equal range | 
|---|
|  | 4930 | while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg; | 
|---|
|  | 4931 | while (eqend != end && *eqend == *eqbeg) ++eqend; | 
|---|
|  | 4932 |  | 
|---|
|  | 4933 | // process outer elements | 
|---|
|  | 4934 | I ltend = eqbeg, gtbeg = eqend; | 
|---|
|  | 4935 |  | 
|---|
|  | 4936 | for (;;) | 
|---|
|  | 4937 | { | 
|---|
|  | 4938 | // find the element from the right side that belongs to the left one | 
|---|
|  | 4939 | for (; gtbeg != end; ++gtbeg) | 
|---|
|  | 4940 | if (!pred(*eqbeg, *gtbeg)) | 
|---|
|  | 4941 | { | 
|---|
|  | 4942 | if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++); | 
|---|
|  | 4943 | else break; | 
|---|
|  | 4944 | } | 
|---|
|  | 4945 |  | 
|---|
|  | 4946 | // find the element from the left side that belongs to the right one | 
|---|
|  | 4947 | for (; ltend != begin; --ltend) | 
|---|
|  | 4948 | if (!pred(*(ltend - 1), *eqbeg)) | 
|---|
|  | 4949 | { | 
|---|
|  | 4950 | if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg); | 
|---|
|  | 4951 | else break; | 
|---|
|  | 4952 | } | 
|---|
|  | 4953 |  | 
|---|
|  | 4954 | // scanned all elements | 
|---|
|  | 4955 | if (gtbeg == end && ltend == begin) | 
|---|
|  | 4956 | { | 
|---|
|  | 4957 | *out_eqbeg = eqbeg; | 
|---|
|  | 4958 | *out_eqend = eqend; | 
|---|
|  | 4959 | return; | 
|---|
|  | 4960 | } | 
|---|
|  | 4961 |  | 
|---|
|  | 4962 | // make room for elements by moving equal area | 
|---|
|  | 4963 | if (gtbeg == end) | 
|---|
|  | 4964 | { | 
|---|
|  | 4965 | if (--ltend != --eqbeg) swap(*ltend, *eqbeg); | 
|---|
|  | 4966 | swap(*eqbeg, *--eqend); | 
|---|
|  | 4967 | } | 
|---|
|  | 4968 | else if (ltend == begin) | 
|---|
|  | 4969 | { | 
|---|
|  | 4970 | if (eqend != gtbeg) swap(*eqbeg, *eqend); | 
|---|
|  | 4971 | ++eqend; | 
|---|
|  | 4972 | swap(*gtbeg++, *eqbeg++); | 
|---|
|  | 4973 | } | 
|---|
|  | 4974 | else swap(*gtbeg++, *--ltend); | 
|---|
|  | 4975 | } | 
|---|
|  | 4976 | } | 
|---|
|  | 4977 |  | 
|---|
|  | 4978 | template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred) | 
|---|
|  | 4979 | { | 
|---|
|  | 4980 | if (pred(*middle, *first)) swap(*middle, *first); | 
|---|
|  | 4981 | if (pred(*last, *middle)) swap(*last, *middle); | 
|---|
|  | 4982 | if (pred(*middle, *first)) swap(*middle, *first); | 
|---|
|  | 4983 | } | 
|---|
|  | 4984 |  | 
|---|
|  | 4985 | template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred) | 
|---|
|  | 4986 | { | 
|---|
|  | 4987 | if (last - first <= 40) | 
|---|
|  | 4988 | { | 
|---|
|  | 4989 | // median of three for small chunks | 
|---|
|  | 4990 | median3(first, middle, last, pred); | 
|---|
|  | 4991 | } | 
|---|
|  | 4992 | else | 
|---|
|  | 4993 | { | 
|---|
|  | 4994 | // median of nine | 
|---|
|  | 4995 | size_t step = (last - first + 1) / 8; | 
|---|
|  | 4996 |  | 
|---|
|  | 4997 | median3(first, first + step, first + 2 * step, pred); | 
|---|
|  | 4998 | median3(middle - step, middle, middle + step, pred); | 
|---|
|  | 4999 | median3(last - 2 * step, last - step, last, pred); | 
|---|
|  | 5000 | median3(first + step, middle, last - step, pred); | 
|---|
|  | 5001 | } | 
|---|
|  | 5002 | } | 
|---|
|  | 5003 |  | 
|---|
|  | 5004 | template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred) | 
|---|
|  | 5005 | { | 
|---|
|  | 5006 | // sort large chunks | 
|---|
|  | 5007 | while (end - begin > 32) | 
|---|
|  | 5008 | { | 
|---|
|  | 5009 | // find median element | 
|---|
|  | 5010 | I middle = begin + (end - begin) / 2; | 
|---|
|  | 5011 | median(begin, middle, end - 1, pred); | 
|---|
|  | 5012 |  | 
|---|
|  | 5013 | // partition in three chunks (< = >) | 
|---|
|  | 5014 | I eqbeg, eqend; | 
|---|
|  | 5015 | partition(begin, middle, end, pred, &eqbeg, &eqend); | 
|---|
|  | 5016 |  | 
|---|
|  | 5017 | // loop on larger half | 
|---|
|  | 5018 | if (eqbeg - begin > end - eqend) | 
|---|
|  | 5019 | { | 
|---|
|  | 5020 | sort(eqend, end, pred); | 
|---|
|  | 5021 | end = eqbeg; | 
|---|
|  | 5022 | } | 
|---|
|  | 5023 | else | 
|---|
|  | 5024 | { | 
|---|
|  | 5025 | sort(begin, eqbeg, pred); | 
|---|
|  | 5026 | begin = eqend; | 
|---|
|  | 5027 | } | 
|---|
|  | 5028 | } | 
|---|
|  | 5029 |  | 
|---|
|  | 5030 | // insertion sort small chunk | 
|---|
|  | 5031 | if (begin != end) insertion_sort(begin, end, pred, &*begin); | 
|---|
|  | 5032 | } | 
|---|
|  | 5033 | } | 
|---|
|  | 5034 |  | 
|---|
|  | 5035 | // Allocator used for AST and evaluation stacks | 
|---|
|  | 5036 | namespace | 
|---|
|  | 5037 | { | 
|---|
|  | 5038 | struct xpath_memory_block | 
|---|
|  | 5039 | { | 
|---|
|  | 5040 | xpath_memory_block* next; | 
|---|
|  | 5041 |  | 
|---|
|  | 5042 | char data[4096]; | 
|---|
|  | 5043 | }; | 
|---|
|  | 5044 |  | 
|---|
|  | 5045 | class xpath_allocator | 
|---|
|  | 5046 | { | 
|---|
|  | 5047 | xpath_memory_block* _root; | 
|---|
|  | 5048 | size_t _root_size; | 
|---|
|  | 5049 |  | 
|---|
|  | 5050 | public: | 
|---|
|  | 5051 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 5052 | jmp_buf* error_handler; | 
|---|
|  | 5053 | #endif | 
|---|
|  | 5054 |  | 
|---|
|  | 5055 | xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) | 
|---|
|  | 5056 | { | 
|---|
|  | 5057 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 5058 | error_handler = 0; | 
|---|
|  | 5059 | #endif | 
|---|
|  | 5060 | } | 
|---|
|  | 5061 |  | 
|---|
|  | 5062 | void* allocate_nothrow(size_t size) | 
|---|
|  | 5063 | { | 
|---|
|  | 5064 | const size_t block_capacity = sizeof(_root->data); | 
|---|
|  | 5065 |  | 
|---|
|  | 5066 | // align size so that we're able to store pointers in subsequent blocks | 
|---|
|  | 5067 | size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1); | 
|---|
|  | 5068 |  | 
|---|
|  | 5069 | if (_root_size + size <= block_capacity) | 
|---|
|  | 5070 | { | 
|---|
|  | 5071 | void* buf = _root->data + _root_size; | 
|---|
|  | 5072 | _root_size += size; | 
|---|
|  | 5073 | return buf; | 
|---|
|  | 5074 | } | 
|---|
|  | 5075 | else | 
|---|
|  | 5076 | { | 
|---|
|  | 5077 | size_t block_data_size = (size > block_capacity) ? size : block_capacity; | 
|---|
|  | 5078 | size_t block_size = block_data_size + offsetof(xpath_memory_block, data); | 
|---|
|  | 5079 |  | 
|---|
|  | 5080 | xpath_memory_block* block = static_cast<xpath_memory_block*>(global_allocate(block_size)); | 
|---|
|  | 5081 | if (!block) return 0; | 
|---|
|  | 5082 |  | 
|---|
|  | 5083 | block->next = _root; | 
|---|
|  | 5084 |  | 
|---|
|  | 5085 | _root = block; | 
|---|
|  | 5086 | _root_size = size; | 
|---|
|  | 5087 |  | 
|---|
|  | 5088 | return block->data; | 
|---|
|  | 5089 | } | 
|---|
|  | 5090 | } | 
|---|
|  | 5091 |  | 
|---|
|  | 5092 | void* allocate(size_t size) | 
|---|
|  | 5093 | { | 
|---|
|  | 5094 | void* result = allocate_nothrow(size); | 
|---|
|  | 5095 |  | 
|---|
|  | 5096 | if (!result) | 
|---|
|  | 5097 | { | 
|---|
|  | 5098 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 5099 | assert(error_handler); | 
|---|
|  | 5100 | longjmp(*error_handler, 1); | 
|---|
|  | 5101 | #else | 
|---|
|  | 5102 | throw std::bad_alloc(); | 
|---|
|  | 5103 | #endif | 
|---|
|  | 5104 | } | 
|---|
|  | 5105 |  | 
|---|
|  | 5106 | return result; | 
|---|
|  | 5107 | } | 
|---|
|  | 5108 |  | 
|---|
|  | 5109 | void* reallocate(void* ptr, size_t old_size, size_t new_size) | 
|---|
|  | 5110 | { | 
|---|
|  | 5111 | // align size so that we're able to store pointers in subsequent blocks | 
|---|
|  | 5112 | old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1); | 
|---|
|  | 5113 | new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1); | 
|---|
|  | 5114 |  | 
|---|
|  | 5115 | // we can only reallocate the last object | 
|---|
|  | 5116 | assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size); | 
|---|
|  | 5117 |  | 
|---|
|  | 5118 | // adjust root size so that we have not allocated the object at all | 
|---|
|  | 5119 | bool only_object = (_root_size == old_size); | 
|---|
|  | 5120 |  | 
|---|
|  | 5121 | if (ptr) _root_size -= old_size; | 
|---|
|  | 5122 |  | 
|---|
|  | 5123 | // allocate a new version (this will obviously reuse the memory if possible) | 
|---|
|  | 5124 | void* result = allocate(new_size); | 
|---|
|  | 5125 | assert(result); | 
|---|
|  | 5126 |  | 
|---|
|  | 5127 | // we have a new block | 
|---|
|  | 5128 | if (result != ptr && ptr) | 
|---|
|  | 5129 | { | 
|---|
|  | 5130 | // copy old data | 
|---|
|  | 5131 | assert(new_size > old_size); | 
|---|
|  | 5132 | memcpy(result, ptr, old_size); | 
|---|
|  | 5133 |  | 
|---|
|  | 5134 | // free the previous page if it had no other objects | 
|---|
|  | 5135 | if (only_object) | 
|---|
|  | 5136 | { | 
|---|
|  | 5137 | assert(_root->data == result); | 
|---|
|  | 5138 | assert(_root->next); | 
|---|
|  | 5139 |  | 
|---|
|  | 5140 | xpath_memory_block* next = _root->next->next; | 
|---|
|  | 5141 |  | 
|---|
|  | 5142 | if (next) | 
|---|
|  | 5143 | { | 
|---|
|  | 5144 | // deallocate the whole page, unless it was the first one | 
|---|
|  | 5145 | global_deallocate(_root->next); | 
|---|
|  | 5146 | _root->next = next; | 
|---|
|  | 5147 | } | 
|---|
|  | 5148 | } | 
|---|
|  | 5149 | } | 
|---|
|  | 5150 |  | 
|---|
|  | 5151 | return result; | 
|---|
|  | 5152 | } | 
|---|
|  | 5153 |  | 
|---|
|  | 5154 | void revert(const xpath_allocator& state) | 
|---|
|  | 5155 | { | 
|---|
|  | 5156 | // free all new pages | 
|---|
|  | 5157 | xpath_memory_block* cur = _root; | 
|---|
|  | 5158 |  | 
|---|
|  | 5159 | while (cur != state._root) | 
|---|
|  | 5160 | { | 
|---|
|  | 5161 | xpath_memory_block* next = cur->next; | 
|---|
|  | 5162 |  | 
|---|
|  | 5163 | global_deallocate(cur); | 
|---|
|  | 5164 |  | 
|---|
|  | 5165 | cur = next; | 
|---|
|  | 5166 | } | 
|---|
|  | 5167 |  | 
|---|
|  | 5168 | // restore state | 
|---|
|  | 5169 | _root = state._root; | 
|---|
|  | 5170 | _root_size = state._root_size; | 
|---|
|  | 5171 | } | 
|---|
|  | 5172 |  | 
|---|
|  | 5173 | void release() | 
|---|
|  | 5174 | { | 
|---|
|  | 5175 | xpath_memory_block* cur = _root; | 
|---|
|  | 5176 | assert(cur); | 
|---|
|  | 5177 |  | 
|---|
|  | 5178 | while (cur->next) | 
|---|
|  | 5179 | { | 
|---|
|  | 5180 | xpath_memory_block* next = cur->next; | 
|---|
|  | 5181 |  | 
|---|
|  | 5182 | global_deallocate(cur); | 
|---|
|  | 5183 |  | 
|---|
|  | 5184 | cur = next; | 
|---|
|  | 5185 | } | 
|---|
|  | 5186 | } | 
|---|
|  | 5187 | }; | 
|---|
|  | 5188 |  | 
|---|
|  | 5189 | struct xpath_allocator_capture | 
|---|
|  | 5190 | { | 
|---|
|  | 5191 | xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) | 
|---|
|  | 5192 | { | 
|---|
|  | 5193 | } | 
|---|
|  | 5194 |  | 
|---|
|  | 5195 | ~xpath_allocator_capture() | 
|---|
|  | 5196 | { | 
|---|
|  | 5197 | _target->revert(_state); | 
|---|
|  | 5198 | } | 
|---|
|  | 5199 |  | 
|---|
|  | 5200 | xpath_allocator* _target; | 
|---|
|  | 5201 | xpath_allocator _state; | 
|---|
|  | 5202 | }; | 
|---|
|  | 5203 |  | 
|---|
|  | 5204 | struct xpath_stack | 
|---|
|  | 5205 | { | 
|---|
|  | 5206 | xpath_allocator* result; | 
|---|
|  | 5207 | xpath_allocator* temp; | 
|---|
|  | 5208 | }; | 
|---|
|  | 5209 |  | 
|---|
|  | 5210 | struct xpath_stack_data | 
|---|
|  | 5211 | { | 
|---|
|  | 5212 | xpath_memory_block blocks[2]; | 
|---|
|  | 5213 | xpath_allocator result; | 
|---|
|  | 5214 | xpath_allocator temp; | 
|---|
|  | 5215 | xpath_stack stack; | 
|---|
|  | 5216 |  | 
|---|
|  | 5217 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 5218 | jmp_buf error_handler; | 
|---|
|  | 5219 | #endif | 
|---|
|  | 5220 |  | 
|---|
|  | 5221 | xpath_stack_data(): result(blocks + 0), temp(blocks + 1) | 
|---|
|  | 5222 | { | 
|---|
|  | 5223 | blocks[0].next = blocks[1].next = 0; | 
|---|
|  | 5224 |  | 
|---|
|  | 5225 | stack.result = &result; | 
|---|
|  | 5226 | stack.temp = &temp; | 
|---|
|  | 5227 |  | 
|---|
|  | 5228 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 5229 | result.error_handler = temp.error_handler = &error_handler; | 
|---|
|  | 5230 | #endif | 
|---|
|  | 5231 | } | 
|---|
|  | 5232 |  | 
|---|
|  | 5233 | ~xpath_stack_data() | 
|---|
|  | 5234 | { | 
|---|
|  | 5235 | result.release(); | 
|---|
|  | 5236 | temp.release(); | 
|---|
|  | 5237 | } | 
|---|
|  | 5238 | }; | 
|---|
|  | 5239 | } | 
|---|
|  | 5240 |  | 
|---|
|  | 5241 | // String class | 
|---|
|  | 5242 | namespace | 
|---|
|  | 5243 | { | 
|---|
|  | 5244 | class xpath_string | 
|---|
|  | 5245 | { | 
|---|
|  | 5246 | const char_t* _buffer; | 
|---|
|  | 5247 | bool _uses_heap; | 
|---|
|  | 5248 |  | 
|---|
|  | 5249 | static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) | 
|---|
|  | 5250 | { | 
|---|
|  | 5251 | char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); | 
|---|
|  | 5252 | assert(result); | 
|---|
|  | 5253 |  | 
|---|
|  | 5254 | memcpy(result, string, length * sizeof(char_t)); | 
|---|
|  | 5255 | result[length] = 0; | 
|---|
|  | 5256 |  | 
|---|
|  | 5257 | return result; | 
|---|
|  | 5258 | } | 
|---|
|  | 5259 |  | 
|---|
|  | 5260 | static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc) | 
|---|
|  | 5261 | { | 
|---|
|  | 5262 | return duplicate_string(string, strlength(string), alloc); | 
|---|
|  | 5263 | } | 
|---|
|  | 5264 |  | 
|---|
|  | 5265 | public: | 
|---|
|  | 5266 | xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false) | 
|---|
|  | 5267 | { | 
|---|
|  | 5268 | } | 
|---|
|  | 5269 |  | 
|---|
|  | 5270 | explicit xpath_string(const char_t* str, xpath_allocator* alloc) | 
|---|
|  | 5271 | { | 
|---|
|  | 5272 | bool empty = (*str == 0); | 
|---|
|  | 5273 |  | 
|---|
|  | 5274 | _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(str, alloc); | 
|---|
|  | 5275 | _uses_heap = !empty; | 
|---|
|  | 5276 | } | 
|---|
|  | 5277 |  | 
|---|
|  | 5278 | explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap) | 
|---|
|  | 5279 | { | 
|---|
|  | 5280 | } | 
|---|
|  | 5281 |  | 
|---|
|  | 5282 | xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc) | 
|---|
|  | 5283 | { | 
|---|
|  | 5284 | assert(begin <= end); | 
|---|
|  | 5285 |  | 
|---|
|  | 5286 | bool empty = (begin == end); | 
|---|
|  | 5287 |  | 
|---|
|  | 5288 | _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc); | 
|---|
|  | 5289 | _uses_heap = !empty; | 
|---|
|  | 5290 | } | 
|---|
|  | 5291 |  | 
|---|
|  | 5292 | void append(const xpath_string& o, xpath_allocator* alloc) | 
|---|
|  | 5293 | { | 
|---|
|  | 5294 | // skip empty sources | 
|---|
|  | 5295 | if (!*o._buffer) return; | 
|---|
|  | 5296 |  | 
|---|
|  | 5297 | // fast append for constant empty target and constant source | 
|---|
|  | 5298 | if (!*_buffer && !_uses_heap && !o._uses_heap) | 
|---|
|  | 5299 | { | 
|---|
|  | 5300 | _buffer = o._buffer; | 
|---|
|  | 5301 | } | 
|---|
|  | 5302 | else | 
|---|
|  | 5303 | { | 
|---|
|  | 5304 | // need to make heap copy | 
|---|
|  | 5305 | size_t target_length = strlength(_buffer); | 
|---|
|  | 5306 | size_t source_length = strlength(o._buffer); | 
|---|
|  | 5307 | size_t length = target_length + source_length; | 
|---|
|  | 5308 |  | 
|---|
|  | 5309 | // allocate new buffer | 
|---|
|  | 5310 | char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (length + 1) * sizeof(char_t))); | 
|---|
|  | 5311 | assert(result); | 
|---|
|  | 5312 |  | 
|---|
|  | 5313 | // append first string to the new buffer in case there was no reallocation | 
|---|
|  | 5314 | if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); | 
|---|
|  | 5315 |  | 
|---|
|  | 5316 | // append second string to the new buffer | 
|---|
|  | 5317 | memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); | 
|---|
|  | 5318 | result[length] = 0; | 
|---|
|  | 5319 |  | 
|---|
|  | 5320 | // finalize | 
|---|
|  | 5321 | _buffer = result; | 
|---|
|  | 5322 | _uses_heap = true; | 
|---|
|  | 5323 | } | 
|---|
|  | 5324 | } | 
|---|
|  | 5325 |  | 
|---|
|  | 5326 | const char_t* c_str() const | 
|---|
|  | 5327 | { | 
|---|
|  | 5328 | return _buffer; | 
|---|
|  | 5329 | } | 
|---|
|  | 5330 |  | 
|---|
|  | 5331 | size_t length() const | 
|---|
|  | 5332 | { | 
|---|
|  | 5333 | return strlength(_buffer); | 
|---|
|  | 5334 | } | 
|---|
|  | 5335 |  | 
|---|
|  | 5336 | char_t* data(xpath_allocator* alloc) | 
|---|
|  | 5337 | { | 
|---|
|  | 5338 | // make private heap copy | 
|---|
|  | 5339 | if (!_uses_heap) | 
|---|
|  | 5340 | { | 
|---|
|  | 5341 | _buffer = duplicate_string(_buffer, alloc); | 
|---|
|  | 5342 | _uses_heap = true; | 
|---|
|  | 5343 | } | 
|---|
|  | 5344 |  | 
|---|
|  | 5345 | return const_cast<char_t*>(_buffer); | 
|---|
|  | 5346 | } | 
|---|
|  | 5347 |  | 
|---|
|  | 5348 | bool empty() const | 
|---|
|  | 5349 | { | 
|---|
|  | 5350 | return *_buffer == 0; | 
|---|
|  | 5351 | } | 
|---|
|  | 5352 |  | 
|---|
|  | 5353 | bool operator==(const xpath_string& o) const | 
|---|
|  | 5354 | { | 
|---|
|  | 5355 | return strequal(_buffer, o._buffer); | 
|---|
|  | 5356 | } | 
|---|
|  | 5357 |  | 
|---|
|  | 5358 | bool operator!=(const xpath_string& o) const | 
|---|
|  | 5359 | { | 
|---|
|  | 5360 | return !strequal(_buffer, o._buffer); | 
|---|
|  | 5361 | } | 
|---|
|  | 5362 |  | 
|---|
|  | 5363 | bool uses_heap() const | 
|---|
|  | 5364 | { | 
|---|
|  | 5365 | return _uses_heap; | 
|---|
|  | 5366 | } | 
|---|
|  | 5367 | }; | 
|---|
|  | 5368 |  | 
|---|
|  | 5369 | xpath_string xpath_string_const(const char_t* str) | 
|---|
|  | 5370 | { | 
|---|
|  | 5371 | return xpath_string(str, false); | 
|---|
|  | 5372 | } | 
|---|
|  | 5373 | } | 
|---|
|  | 5374 |  | 
|---|
|  | 5375 | namespace | 
|---|
|  | 5376 | { | 
|---|
|  | 5377 | bool starts_with(const char_t* string, const char_t* pattern) | 
|---|
|  | 5378 | { | 
|---|
|  | 5379 | while (*pattern && *string == *pattern) | 
|---|
|  | 5380 | { | 
|---|
|  | 5381 | string++; | 
|---|
|  | 5382 | pattern++; | 
|---|
|  | 5383 | } | 
|---|
|  | 5384 |  | 
|---|
|  | 5385 | return *pattern == 0; | 
|---|
|  | 5386 | } | 
|---|
|  | 5387 |  | 
|---|
|  | 5388 | const char_t* find_char(const char_t* s, char_t c) | 
|---|
|  | 5389 | { | 
|---|
|  | 5390 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 5391 | return wcschr(s, c); | 
|---|
|  | 5392 | #else | 
|---|
|  | 5393 | return strchr(s, c); | 
|---|
|  | 5394 | #endif | 
|---|
|  | 5395 | } | 
|---|
|  | 5396 |  | 
|---|
|  | 5397 | const char_t* find_substring(const char_t* s, const char_t* p) | 
|---|
|  | 5398 | { | 
|---|
|  | 5399 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 5400 | // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) | 
|---|
|  | 5401 | return (*p == 0) ? s : wcsstr(s, p); | 
|---|
|  | 5402 | #else | 
|---|
|  | 5403 | return strstr(s, p); | 
|---|
|  | 5404 | #endif | 
|---|
|  | 5405 | } | 
|---|
|  | 5406 |  | 
|---|
|  | 5407 | // Converts symbol to lower case, if it is an ASCII one | 
|---|
|  | 5408 | char_t tolower_ascii(char_t ch) | 
|---|
|  | 5409 | { | 
|---|
|  | 5410 | return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch; | 
|---|
|  | 5411 | } | 
|---|
|  | 5412 |  | 
|---|
|  | 5413 | xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) | 
|---|
|  | 5414 | { | 
|---|
|  | 5415 | if (na.attribute()) | 
|---|
|  | 5416 | return xpath_string_const(na.attribute().value()); | 
|---|
|  | 5417 | else | 
|---|
|  | 5418 | { | 
|---|
|  | 5419 | const xml_node& n = na.node(); | 
|---|
|  | 5420 |  | 
|---|
|  | 5421 | switch (n.type()) | 
|---|
|  | 5422 | { | 
|---|
|  | 5423 | case node_pcdata: | 
|---|
|  | 5424 | case node_cdata: | 
|---|
|  | 5425 | case node_comment: | 
|---|
|  | 5426 | case node_pi: | 
|---|
|  | 5427 | return xpath_string_const(n.value()); | 
|---|
|  | 5428 |  | 
|---|
|  | 5429 | case node_document: | 
|---|
|  | 5430 | case node_element: | 
|---|
|  | 5431 | { | 
|---|
|  | 5432 | xpath_string result; | 
|---|
|  | 5433 |  | 
|---|
|  | 5434 | xml_node cur = n.first_child(); | 
|---|
|  | 5435 |  | 
|---|
|  | 5436 | while (cur && cur != n) | 
|---|
|  | 5437 | { | 
|---|
|  | 5438 | if (cur.type() == node_pcdata || cur.type() == node_cdata) | 
|---|
|  | 5439 | result.append(xpath_string_const(cur.value()), alloc); | 
|---|
|  | 5440 |  | 
|---|
|  | 5441 | if (cur.first_child()) | 
|---|
|  | 5442 | cur = cur.first_child(); | 
|---|
|  | 5443 | else if (cur.next_sibling()) | 
|---|
|  | 5444 | cur = cur.next_sibling(); | 
|---|
|  | 5445 | else | 
|---|
|  | 5446 | { | 
|---|
|  | 5447 | while (!cur.next_sibling() && cur != n) | 
|---|
|  | 5448 | cur = cur.parent(); | 
|---|
|  | 5449 |  | 
|---|
|  | 5450 | if (cur != n) cur = cur.next_sibling(); | 
|---|
|  | 5451 | } | 
|---|
|  | 5452 | } | 
|---|
|  | 5453 |  | 
|---|
|  | 5454 | return result; | 
|---|
|  | 5455 | } | 
|---|
|  | 5456 |  | 
|---|
|  | 5457 | default: | 
|---|
|  | 5458 | return xpath_string(); | 
|---|
|  | 5459 | } | 
|---|
|  | 5460 | } | 
|---|
|  | 5461 | } | 
|---|
|  | 5462 |  | 
|---|
|  | 5463 | unsigned int node_height(xml_node n) | 
|---|
|  | 5464 | { | 
|---|
|  | 5465 | unsigned int result = 0; | 
|---|
|  | 5466 |  | 
|---|
|  | 5467 | while (n) | 
|---|
|  | 5468 | { | 
|---|
|  | 5469 | ++result; | 
|---|
|  | 5470 | n = n.parent(); | 
|---|
|  | 5471 | } | 
|---|
|  | 5472 |  | 
|---|
|  | 5473 | return result; | 
|---|
|  | 5474 | } | 
|---|
|  | 5475 |  | 
|---|
|  | 5476 | bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh) | 
|---|
|  | 5477 | { | 
|---|
|  | 5478 | // normalize heights | 
|---|
|  | 5479 | for (unsigned int i = rh; i < lh; i++) ln = ln.parent(); | 
|---|
|  | 5480 | for (unsigned int j = lh; j < rh; j++) rn = rn.parent(); | 
|---|
|  | 5481 |  | 
|---|
|  | 5482 | // one node is the ancestor of the other | 
|---|
|  | 5483 | if (ln == rn) return lh < rh; | 
|---|
|  | 5484 |  | 
|---|
|  | 5485 | // find common ancestor | 
|---|
|  | 5486 | while (ln.parent() != rn.parent()) | 
|---|
|  | 5487 | { | 
|---|
|  | 5488 | ln = ln.parent(); | 
|---|
|  | 5489 | rn = rn.parent(); | 
|---|
|  | 5490 | } | 
|---|
|  | 5491 |  | 
|---|
|  | 5492 | // there is no common ancestor (the shared parent is null), nodes are from different documents | 
|---|
|  | 5493 | if (!ln.parent()) return ln < rn; | 
|---|
|  | 5494 |  | 
|---|
|  | 5495 | // determine sibling order | 
|---|
|  | 5496 | for (; ln; ln = ln.next_sibling()) | 
|---|
|  | 5497 | if (ln == rn) | 
|---|
|  | 5498 | return true; | 
|---|
|  | 5499 |  | 
|---|
|  | 5500 | return false; | 
|---|
|  | 5501 | } | 
|---|
|  | 5502 |  | 
|---|
|  | 5503 | bool node_is_ancestor(xml_node parent, xml_node node) | 
|---|
|  | 5504 | { | 
|---|
|  | 5505 | while (node && node != parent) node = node.parent(); | 
|---|
|  | 5506 |  | 
|---|
|  | 5507 | return parent && node == parent; | 
|---|
|  | 5508 | } | 
|---|
|  | 5509 |  | 
|---|
|  | 5510 | const void* document_order(const xpath_node& xnode) | 
|---|
|  | 5511 | { | 
|---|
|  | 5512 | xml_node_struct* node = xnode.node().internal_object(); | 
|---|
|  | 5513 |  | 
|---|
|  | 5514 | if (node) | 
|---|
|  | 5515 | { | 
|---|
|  | 5516 | if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name; | 
|---|
|  | 5517 | if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value; | 
|---|
|  | 5518 | return 0; | 
|---|
|  | 5519 | } | 
|---|
|  | 5520 |  | 
|---|
|  | 5521 | xml_attribute_struct* attr = xnode.attribute().internal_object(); | 
|---|
|  | 5522 |  | 
|---|
|  | 5523 | if (attr) | 
|---|
|  | 5524 | { | 
|---|
|  | 5525 | if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name; | 
|---|
|  | 5526 | if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value; | 
|---|
|  | 5527 | return 0; | 
|---|
|  | 5528 | } | 
|---|
|  | 5529 |  | 
|---|
|  | 5530 | return 0; | 
|---|
|  | 5531 | } | 
|---|
|  | 5532 |  | 
|---|
|  | 5533 | struct document_order_comparator | 
|---|
|  | 5534 | { | 
|---|
|  | 5535 | bool operator()(const xpath_node& lhs, const xpath_node& rhs) const | 
|---|
|  | 5536 | { | 
|---|
|  | 5537 | // optimized document order based check | 
|---|
|  | 5538 | const void* lo = document_order(lhs); | 
|---|
|  | 5539 | const void* ro = document_order(rhs); | 
|---|
|  | 5540 |  | 
|---|
|  | 5541 | if (lo && ro) return lo < ro; | 
|---|
|  | 5542 |  | 
|---|
|  | 5543 | // slow comparison | 
|---|
|  | 5544 | xml_node ln = lhs.node(), rn = rhs.node(); | 
|---|
|  | 5545 |  | 
|---|
|  | 5546 | // compare attributes | 
|---|
|  | 5547 | if (lhs.attribute() && rhs.attribute()) | 
|---|
|  | 5548 | { | 
|---|
|  | 5549 | // shared parent | 
|---|
|  | 5550 | if (lhs.parent() == rhs.parent()) | 
|---|
|  | 5551 | { | 
|---|
|  | 5552 | // determine sibling order | 
|---|
|  | 5553 | for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) | 
|---|
|  | 5554 | if (a == rhs.attribute()) | 
|---|
|  | 5555 | return true; | 
|---|
|  | 5556 |  | 
|---|
|  | 5557 | return false; | 
|---|
|  | 5558 | } | 
|---|
|  | 5559 |  | 
|---|
|  | 5560 | // compare attribute parents | 
|---|
|  | 5561 | ln = lhs.parent(); | 
|---|
|  | 5562 | rn = rhs.parent(); | 
|---|
|  | 5563 | } | 
|---|
|  | 5564 | else if (lhs.attribute()) | 
|---|
|  | 5565 | { | 
|---|
|  | 5566 | // attributes go after the parent element | 
|---|
|  | 5567 | if (lhs.parent() == rhs.node()) return false; | 
|---|
|  | 5568 |  | 
|---|
|  | 5569 | ln = lhs.parent(); | 
|---|
|  | 5570 | } | 
|---|
|  | 5571 | else if (rhs.attribute()) | 
|---|
|  | 5572 | { | 
|---|
|  | 5573 | // attributes go after the parent element | 
|---|
|  | 5574 | if (rhs.parent() == lhs.node()) return true; | 
|---|
|  | 5575 |  | 
|---|
|  | 5576 | rn = rhs.parent(); | 
|---|
|  | 5577 | } | 
|---|
|  | 5578 |  | 
|---|
|  | 5579 | if (ln == rn) return false; | 
|---|
|  | 5580 |  | 
|---|
|  | 5581 | unsigned int lh = node_height(ln); | 
|---|
|  | 5582 | unsigned int rh = node_height(rn); | 
|---|
|  | 5583 |  | 
|---|
|  | 5584 | return node_is_before(ln, lh, rn, rh); | 
|---|
|  | 5585 | } | 
|---|
|  | 5586 | }; | 
|---|
|  | 5587 |  | 
|---|
|  | 5588 | struct duplicate_comparator | 
|---|
|  | 5589 | { | 
|---|
|  | 5590 | bool operator()(const xpath_node& lhs, const xpath_node& rhs) const | 
|---|
|  | 5591 | { | 
|---|
|  | 5592 | if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; | 
|---|
|  | 5593 | else return rhs.attribute() ? false : lhs.node() < rhs.node(); | 
|---|
|  | 5594 | } | 
|---|
|  | 5595 | }; | 
|---|
|  | 5596 |  | 
|---|
|  | 5597 | double gen_nan() | 
|---|
|  | 5598 | { | 
|---|
|  | 5599 | #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) | 
|---|
|  | 5600 | union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1]; | 
|---|
|  | 5601 | u[0].i = 0x7fc00000; | 
|---|
|  | 5602 | return u[0].f; | 
|---|
|  | 5603 | #else | 
|---|
|  | 5604 | // fallback | 
|---|
|  | 5605 | const volatile double zero = 0.0; | 
|---|
|  | 5606 | return zero / zero; | 
|---|
|  | 5607 | #endif | 
|---|
|  | 5608 | } | 
|---|
|  | 5609 |  | 
|---|
|  | 5610 | bool is_nan(double value) | 
|---|
|  | 5611 | { | 
|---|
|  | 5612 | #if defined(_MSC_VER) || defined(__BORLANDC__) | 
|---|
|  | 5613 | return !!_isnan(value); | 
|---|
|  | 5614 | #elif defined(fpclassify) && defined(FP_NAN) | 
|---|
|  | 5615 | return fpclassify(value) == FP_NAN; | 
|---|
|  | 5616 | #else | 
|---|
|  | 5617 | // fallback | 
|---|
|  | 5618 | const volatile double v = value; | 
|---|
|  | 5619 | return v != v; | 
|---|
|  | 5620 | #endif | 
|---|
|  | 5621 | } | 
|---|
|  | 5622 |  | 
|---|
|  | 5623 | const char_t* convert_number_to_string_special(double value) | 
|---|
|  | 5624 | { | 
|---|
|  | 5625 | #if defined(_MSC_VER) || defined(__BORLANDC__) | 
|---|
|  | 5626 | if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; | 
|---|
|  | 5627 | if (_isnan(value)) return PUGIXML_TEXT("NaN"); | 
|---|
|  | 5628 | return PUGIXML_TEXT("-Infinity") + (value > 0); | 
|---|
|  | 5629 | #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) | 
|---|
|  | 5630 | switch (fpclassify(value)) | 
|---|
|  | 5631 | { | 
|---|
|  | 5632 | case FP_NAN: | 
|---|
|  | 5633 | return PUGIXML_TEXT("NaN"); | 
|---|
|  | 5634 |  | 
|---|
|  | 5635 | case FP_INFINITE: | 
|---|
|  | 5636 | return PUGIXML_TEXT("-Infinity") + (value > 0); | 
|---|
|  | 5637 |  | 
|---|
|  | 5638 | case FP_ZERO: | 
|---|
|  | 5639 | return PUGIXML_TEXT("0"); | 
|---|
|  | 5640 |  | 
|---|
|  | 5641 | default: | 
|---|
|  | 5642 | return 0; | 
|---|
|  | 5643 | } | 
|---|
|  | 5644 | #else | 
|---|
|  | 5645 | // fallback | 
|---|
|  | 5646 | const volatile double v = value; | 
|---|
|  | 5647 |  | 
|---|
|  | 5648 | if (v == 0) return PUGIXML_TEXT("0"); | 
|---|
|  | 5649 | if (v != v) return PUGIXML_TEXT("NaN"); | 
|---|
|  | 5650 | if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0); | 
|---|
|  | 5651 | return 0; | 
|---|
|  | 5652 | #endif | 
|---|
|  | 5653 | } | 
|---|
|  | 5654 |  | 
|---|
|  | 5655 | bool convert_number_to_boolean(double value) | 
|---|
|  | 5656 | { | 
|---|
|  | 5657 | return (value != 0 && !is_nan(value)); | 
|---|
|  | 5658 | } | 
|---|
|  | 5659 |  | 
|---|
|  | 5660 | void truncate_zeros(char* begin, char* end) | 
|---|
|  | 5661 | { | 
|---|
|  | 5662 | while (begin != end && end[-1] == '0') end--; | 
|---|
|  | 5663 |  | 
|---|
|  | 5664 | *end = 0; | 
|---|
|  | 5665 | } | 
|---|
|  | 5666 |  | 
|---|
|  | 5667 | // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent | 
|---|
|  | 5668 | #if defined(_MSC_VER) && _MSC_VER >= 1400 | 
|---|
|  | 5669 | void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) | 
|---|
|  | 5670 | { | 
|---|
|  | 5671 | // get base values | 
|---|
|  | 5672 | int sign, exponent; | 
|---|
|  | 5673 | _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign); | 
|---|
|  | 5674 |  | 
|---|
|  | 5675 | // truncate redundant zeros | 
|---|
|  | 5676 | truncate_zeros(buffer, buffer + strlen(buffer)); | 
|---|
|  | 5677 |  | 
|---|
|  | 5678 | // fill results | 
|---|
|  | 5679 | *out_mantissa = buffer; | 
|---|
|  | 5680 | *out_exponent = exponent; | 
|---|
|  | 5681 | } | 
|---|
|  | 5682 | #else | 
|---|
|  | 5683 | void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) | 
|---|
|  | 5684 | { | 
|---|
|  | 5685 | // get a scientific notation value with IEEE DBL_DIG decimals | 
|---|
|  | 5686 | sprintf(buffer, "%.*e", DBL_DIG, value); | 
|---|
|  | 5687 | assert(strlen(buffer) < buffer_size); | 
|---|
|  | 5688 | (void)!buffer_size; | 
|---|
|  | 5689 |  | 
|---|
|  | 5690 | // get the exponent (possibly negative) | 
|---|
|  | 5691 | char* exponent_string = strchr(buffer, 'e'); | 
|---|
|  | 5692 | assert(exponent_string); | 
|---|
|  | 5693 |  | 
|---|
|  | 5694 | int exponent = atoi(exponent_string + 1); | 
|---|
|  | 5695 |  | 
|---|
|  | 5696 | // extract mantissa string: skip sign | 
|---|
|  | 5697 | char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; | 
|---|
|  | 5698 | assert(mantissa[0] != '0' && mantissa[1] == '.'); | 
|---|
|  | 5699 |  | 
|---|
|  | 5700 | // divide mantissa by 10 to eliminate integer part | 
|---|
|  | 5701 | mantissa[1] = mantissa[0]; | 
|---|
|  | 5702 | mantissa++; | 
|---|
|  | 5703 | exponent++; | 
|---|
|  | 5704 |  | 
|---|
|  | 5705 | // remove extra mantissa digits and zero-terminate mantissa | 
|---|
|  | 5706 | truncate_zeros(mantissa, exponent_string); | 
|---|
|  | 5707 |  | 
|---|
|  | 5708 | // fill results | 
|---|
|  | 5709 | *out_mantissa = mantissa; | 
|---|
|  | 5710 | *out_exponent = exponent; | 
|---|
|  | 5711 | } | 
|---|
|  | 5712 | #endif | 
|---|
|  | 5713 |  | 
|---|
|  | 5714 | xpath_string convert_number_to_string(double value, xpath_allocator* alloc) | 
|---|
|  | 5715 | { | 
|---|
|  | 5716 | // try special number conversion | 
|---|
|  | 5717 | const char_t* special = convert_number_to_string_special(value); | 
|---|
|  | 5718 | if (special) return xpath_string_const(special); | 
|---|
|  | 5719 |  | 
|---|
|  | 5720 | // get mantissa + exponent form | 
|---|
|  | 5721 | char mantissa_buffer[64]; | 
|---|
|  | 5722 |  | 
|---|
|  | 5723 | char* mantissa; | 
|---|
|  | 5724 | int exponent; | 
|---|
|  | 5725 | convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent); | 
|---|
|  | 5726 |  | 
|---|
|  | 5727 | // make the number! | 
|---|
|  | 5728 | char_t result[512]; | 
|---|
|  | 5729 | char_t* s = result; | 
|---|
|  | 5730 |  | 
|---|
|  | 5731 | // sign | 
|---|
|  | 5732 | if (value < 0) *s++ = '-'; | 
|---|
|  | 5733 |  | 
|---|
|  | 5734 | // integer part | 
|---|
|  | 5735 | if (exponent <= 0) | 
|---|
|  | 5736 | { | 
|---|
|  | 5737 | *s++ = '0'; | 
|---|
|  | 5738 | } | 
|---|
|  | 5739 | else | 
|---|
|  | 5740 | { | 
|---|
|  | 5741 | while (exponent > 0) | 
|---|
|  | 5742 | { | 
|---|
|  | 5743 | assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9); | 
|---|
|  | 5744 | *s++ = *mantissa ? *mantissa++ : '0'; | 
|---|
|  | 5745 | exponent--; | 
|---|
|  | 5746 | } | 
|---|
|  | 5747 | } | 
|---|
|  | 5748 |  | 
|---|
|  | 5749 | // fractional part | 
|---|
|  | 5750 | if (*mantissa) | 
|---|
|  | 5751 | { | 
|---|
|  | 5752 | // decimal point | 
|---|
|  | 5753 | *s++ = '.'; | 
|---|
|  | 5754 |  | 
|---|
|  | 5755 | // extra zeroes from negative exponent | 
|---|
|  | 5756 | while (exponent < 0) | 
|---|
|  | 5757 | { | 
|---|
|  | 5758 | *s++ = '0'; | 
|---|
|  | 5759 | exponent++; | 
|---|
|  | 5760 | } | 
|---|
|  | 5761 |  | 
|---|
|  | 5762 | // extra mantissa digits | 
|---|
|  | 5763 | while (*mantissa) | 
|---|
|  | 5764 | { | 
|---|
|  | 5765 | assert((unsigned)(*mantissa - '0') <= 9); | 
|---|
|  | 5766 | *s++ = *mantissa++; | 
|---|
|  | 5767 | } | 
|---|
|  | 5768 | } | 
|---|
|  | 5769 |  | 
|---|
|  | 5770 | // zero-terminate | 
|---|
|  | 5771 | assert(s < result + sizeof(result) / sizeof(result[0])); | 
|---|
|  | 5772 | *s = 0; | 
|---|
|  | 5773 |  | 
|---|
|  | 5774 | return xpath_string(result, alloc); | 
|---|
|  | 5775 | } | 
|---|
|  | 5776 |  | 
|---|
|  | 5777 | bool check_string_to_number_format(const char_t* string) | 
|---|
|  | 5778 | { | 
|---|
|  | 5779 | // parse leading whitespace | 
|---|
|  | 5780 | while (IS_CHARTYPE(*string, ct_space)) ++string; | 
|---|
|  | 5781 |  | 
|---|
|  | 5782 | // parse sign | 
|---|
|  | 5783 | if (*string == '-') ++string; | 
|---|
|  | 5784 |  | 
|---|
|  | 5785 | if (!*string) return false; | 
|---|
|  | 5786 |  | 
|---|
|  | 5787 | // if there is no integer part, there should be a decimal part with at least one digit | 
|---|
|  | 5788 | if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false; | 
|---|
|  | 5789 |  | 
|---|
|  | 5790 | // parse integer part | 
|---|
|  | 5791 | while (IS_CHARTYPEX(*string, ctx_digit)) ++string; | 
|---|
|  | 5792 |  | 
|---|
|  | 5793 | // parse decimal part | 
|---|
|  | 5794 | if (*string == '.') | 
|---|
|  | 5795 | { | 
|---|
|  | 5796 | ++string; | 
|---|
|  | 5797 |  | 
|---|
|  | 5798 | while (IS_CHARTYPEX(*string, ctx_digit)) ++string; | 
|---|
|  | 5799 | } | 
|---|
|  | 5800 |  | 
|---|
|  | 5801 | // parse trailing whitespace | 
|---|
|  | 5802 | while (IS_CHARTYPE(*string, ct_space)) ++string; | 
|---|
|  | 5803 |  | 
|---|
|  | 5804 | return *string == 0; | 
|---|
|  | 5805 | } | 
|---|
|  | 5806 |  | 
|---|
|  | 5807 | double convert_string_to_number(const char_t* string) | 
|---|
|  | 5808 | { | 
|---|
|  | 5809 | // check string format | 
|---|
|  | 5810 | if (!check_string_to_number_format(string)) return gen_nan(); | 
|---|
|  | 5811 |  | 
|---|
|  | 5812 | // parse string | 
|---|
|  | 5813 | #ifdef PUGIXML_WCHAR_MODE | 
|---|
|  | 5814 | return wcstod(string, 0); | 
|---|
|  | 5815 | #else | 
|---|
|  | 5816 | return atof(string); | 
|---|
|  | 5817 | #endif | 
|---|
|  | 5818 | } | 
|---|
|  | 5819 |  | 
|---|
|  | 5820 | bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result) | 
|---|
|  | 5821 | { | 
|---|
|  | 5822 | char_t buffer[32]; | 
|---|
|  | 5823 |  | 
|---|
|  | 5824 | size_t length = static_cast<size_t>(end - begin); | 
|---|
|  | 5825 | char_t* scratch = buffer; | 
|---|
|  | 5826 |  | 
|---|
|  | 5827 | if (length >= sizeof(buffer) / sizeof(buffer[0])) | 
|---|
|  | 5828 | { | 
|---|
|  | 5829 | // need to make dummy on-heap copy | 
|---|
|  | 5830 | scratch = static_cast<char_t*>(global_allocate((length + 1) * sizeof(char_t))); | 
|---|
|  | 5831 | if (!scratch) return false; | 
|---|
|  | 5832 | } | 
|---|
|  | 5833 |  | 
|---|
|  | 5834 | // copy string to zero-terminated buffer and perform conversion | 
|---|
|  | 5835 | memcpy(scratch, begin, length * sizeof(char_t)); | 
|---|
|  | 5836 | scratch[length] = 0; | 
|---|
|  | 5837 |  | 
|---|
|  | 5838 | *out_result = convert_string_to_number(scratch); | 
|---|
|  | 5839 |  | 
|---|
|  | 5840 | // free dummy buffer | 
|---|
|  | 5841 | if (scratch != buffer) global_deallocate(scratch); | 
|---|
|  | 5842 |  | 
|---|
|  | 5843 | return true; | 
|---|
|  | 5844 | } | 
|---|
|  | 5845 |  | 
|---|
|  | 5846 | double round_nearest(double value) | 
|---|
|  | 5847 | { | 
|---|
|  | 5848 | return floor(value + 0.5); | 
|---|
|  | 5849 | } | 
|---|
|  | 5850 |  | 
|---|
|  | 5851 | double round_nearest_nzero(double value) | 
|---|
|  | 5852 | { | 
|---|
|  | 5853 | // same as round_nearest, but returns -0 for [-0.5, -0] | 
|---|
|  | 5854 | // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) | 
|---|
|  | 5855 | return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); | 
|---|
|  | 5856 | } | 
|---|
|  | 5857 |  | 
|---|
|  | 5858 | const char_t* qualified_name(const xpath_node& node) | 
|---|
|  | 5859 | { | 
|---|
|  | 5860 | return node.attribute() ? node.attribute().name() : node.node().name(); | 
|---|
|  | 5861 | } | 
|---|
|  | 5862 |  | 
|---|
|  | 5863 | const char_t* local_name(const xpath_node& node) | 
|---|
|  | 5864 | { | 
|---|
|  | 5865 | const char_t* name = qualified_name(node); | 
|---|
|  | 5866 | const char_t* p = find_char(name, ':'); | 
|---|
|  | 5867 |  | 
|---|
|  | 5868 | return p ? p + 1 : name; | 
|---|
|  | 5869 | } | 
|---|
|  | 5870 |  | 
|---|
|  | 5871 | struct namespace_uri_predicate | 
|---|
|  | 5872 | { | 
|---|
|  | 5873 | const char_t* prefix; | 
|---|
|  | 5874 | size_t prefix_length; | 
|---|
|  | 5875 |  | 
|---|
|  | 5876 | namespace_uri_predicate(const char_t* name) | 
|---|
|  | 5877 | { | 
|---|
|  | 5878 | const char_t* pos = find_char(name, ':'); | 
|---|
|  | 5879 |  | 
|---|
|  | 5880 | prefix = pos ? name : 0; | 
|---|
|  | 5881 | prefix_length = pos ? static_cast<size_t>(pos - name) : 0; | 
|---|
|  | 5882 | } | 
|---|
|  | 5883 |  | 
|---|
|  | 5884 | bool operator()(const xml_attribute& a) const | 
|---|
|  | 5885 | { | 
|---|
|  | 5886 | const char_t* name = a.name(); | 
|---|
|  | 5887 |  | 
|---|
|  | 5888 | if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; | 
|---|
|  | 5889 |  | 
|---|
|  | 5890 | return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; | 
|---|
|  | 5891 | } | 
|---|
|  | 5892 | }; | 
|---|
|  | 5893 |  | 
|---|
|  | 5894 | const char_t* namespace_uri(const xml_node& node) | 
|---|
|  | 5895 | { | 
|---|
|  | 5896 | namespace_uri_predicate pred = node.name(); | 
|---|
|  | 5897 |  | 
|---|
|  | 5898 | xml_node p = node; | 
|---|
|  | 5899 |  | 
|---|
|  | 5900 | while (p) | 
|---|
|  | 5901 | { | 
|---|
|  | 5902 | xml_attribute a = p.find_attribute(pred); | 
|---|
|  | 5903 |  | 
|---|
|  | 5904 | if (a) return a.value(); | 
|---|
|  | 5905 |  | 
|---|
|  | 5906 | p = p.parent(); | 
|---|
|  | 5907 | } | 
|---|
|  | 5908 |  | 
|---|
|  | 5909 | return PUGIXML_TEXT(""); | 
|---|
|  | 5910 | } | 
|---|
|  | 5911 |  | 
|---|
|  | 5912 | const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent) | 
|---|
|  | 5913 | { | 
|---|
|  | 5914 | namespace_uri_predicate pred = attr.name(); | 
|---|
|  | 5915 |  | 
|---|
|  | 5916 | // Default namespace does not apply to attributes | 
|---|
|  | 5917 | if (!pred.prefix) return PUGIXML_TEXT(""); | 
|---|
|  | 5918 |  | 
|---|
|  | 5919 | xml_node p = parent; | 
|---|
|  | 5920 |  | 
|---|
|  | 5921 | while (p) | 
|---|
|  | 5922 | { | 
|---|
|  | 5923 | xml_attribute a = p.find_attribute(pred); | 
|---|
|  | 5924 |  | 
|---|
|  | 5925 | if (a) return a.value(); | 
|---|
|  | 5926 |  | 
|---|
|  | 5927 | p = p.parent(); | 
|---|
|  | 5928 | } | 
|---|
|  | 5929 |  | 
|---|
|  | 5930 | return PUGIXML_TEXT(""); | 
|---|
|  | 5931 | } | 
|---|
|  | 5932 |  | 
|---|
|  | 5933 | const char_t* namespace_uri(const xpath_node& node) | 
|---|
|  | 5934 | { | 
|---|
|  | 5935 | return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); | 
|---|
|  | 5936 | } | 
|---|
|  | 5937 |  | 
|---|
|  | 5938 | void normalize_space(char_t* buffer) | 
|---|
|  | 5939 | { | 
|---|
|  | 5940 | char_t* write = buffer; | 
|---|
|  | 5941 |  | 
|---|
|  | 5942 | for (char_t* it = buffer; *it; ) | 
|---|
|  | 5943 | { | 
|---|
|  | 5944 | char_t ch = *it++; | 
|---|
|  | 5945 |  | 
|---|
|  | 5946 | if (IS_CHARTYPE(ch, ct_space)) | 
|---|
|  | 5947 | { | 
|---|
|  | 5948 | // replace whitespace sequence with single space | 
|---|
|  | 5949 | while (IS_CHARTYPE(*it, ct_space)) it++; | 
|---|
|  | 5950 |  | 
|---|
|  | 5951 | // avoid leading spaces | 
|---|
|  | 5952 | if (write != buffer) *write++ = ' '; | 
|---|
|  | 5953 | } | 
|---|
|  | 5954 | else *write++ = ch; | 
|---|
|  | 5955 | } | 
|---|
|  | 5956 |  | 
|---|
|  | 5957 | // remove trailing space | 
|---|
|  | 5958 | if (write != buffer && IS_CHARTYPE(write[-1], ct_space)) write--; | 
|---|
|  | 5959 |  | 
|---|
|  | 5960 | // zero-terminate | 
|---|
|  | 5961 | *write = 0; | 
|---|
|  | 5962 | } | 
|---|
|  | 5963 |  | 
|---|
|  | 5964 | void translate(char_t* buffer, const char_t* from, const char_t* to) | 
|---|
|  | 5965 | { | 
|---|
|  | 5966 | size_t to_length = strlength(to); | 
|---|
|  | 5967 |  | 
|---|
|  | 5968 | char_t* write = buffer; | 
|---|
|  | 5969 |  | 
|---|
|  | 5970 | while (*buffer) | 
|---|
|  | 5971 | { | 
|---|
|  | 5972 | DMC_VOLATILE char_t ch = *buffer++; | 
|---|
|  | 5973 |  | 
|---|
|  | 5974 | const char_t* pos = find_char(from, ch); | 
|---|
|  | 5975 |  | 
|---|
|  | 5976 | if (!pos) | 
|---|
|  | 5977 | *write++ = ch; // do not process | 
|---|
|  | 5978 | else if (static_cast<size_t>(pos - from) < to_length) | 
|---|
|  | 5979 | *write++ = to[pos - from]; // replace | 
|---|
|  | 5980 | } | 
|---|
|  | 5981 |  | 
|---|
|  | 5982 | // zero-terminate | 
|---|
|  | 5983 | *write = 0; | 
|---|
|  | 5984 | } | 
|---|
|  | 5985 |  | 
|---|
|  | 5986 | struct xpath_variable_boolean: xpath_variable | 
|---|
|  | 5987 | { | 
|---|
|  | 5988 | xpath_variable_boolean(): value(false) | 
|---|
|  | 5989 | { | 
|---|
|  | 5990 | } | 
|---|
|  | 5991 |  | 
|---|
|  | 5992 | bool value; | 
|---|
|  | 5993 | char_t name[1]; | 
|---|
|  | 5994 | }; | 
|---|
|  | 5995 |  | 
|---|
|  | 5996 | struct xpath_variable_number: xpath_variable | 
|---|
|  | 5997 | { | 
|---|
|  | 5998 | xpath_variable_number(): value(0) | 
|---|
|  | 5999 | { | 
|---|
|  | 6000 | } | 
|---|
|  | 6001 |  | 
|---|
|  | 6002 | double value; | 
|---|
|  | 6003 | char_t name[1]; | 
|---|
|  | 6004 | }; | 
|---|
|  | 6005 |  | 
|---|
|  | 6006 | struct xpath_variable_string: xpath_variable | 
|---|
|  | 6007 | { | 
|---|
|  | 6008 | xpath_variable_string(): value(0) | 
|---|
|  | 6009 | { | 
|---|
|  | 6010 | } | 
|---|
|  | 6011 |  | 
|---|
|  | 6012 | ~xpath_variable_string() | 
|---|
|  | 6013 | { | 
|---|
|  | 6014 | if (value) global_deallocate(value); | 
|---|
|  | 6015 | } | 
|---|
|  | 6016 |  | 
|---|
|  | 6017 | char_t* value; | 
|---|
|  | 6018 | char_t name[1]; | 
|---|
|  | 6019 | }; | 
|---|
|  | 6020 |  | 
|---|
|  | 6021 | struct xpath_variable_node_set: xpath_variable | 
|---|
|  | 6022 | { | 
|---|
|  | 6023 | xpath_node_set value; | 
|---|
|  | 6024 | char_t name[1]; | 
|---|
|  | 6025 | }; | 
|---|
|  | 6026 |  | 
|---|
|  | 6027 | const xpath_node_set dummy_node_set; | 
|---|
|  | 6028 |  | 
|---|
|  | 6029 | unsigned int hash_string(const char_t* str) | 
|---|
|  | 6030 | { | 
|---|
|  | 6031 | // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) | 
|---|
|  | 6032 | unsigned int result = 0; | 
|---|
|  | 6033 |  | 
|---|
|  | 6034 | while (*str) | 
|---|
|  | 6035 | { | 
|---|
|  | 6036 | result += static_cast<unsigned int>(*str++); | 
|---|
|  | 6037 | result += result << 10; | 
|---|
|  | 6038 | result ^= result >> 6; | 
|---|
|  | 6039 | } | 
|---|
|  | 6040 |  | 
|---|
|  | 6041 | result += result << 3; | 
|---|
|  | 6042 | result ^= result >> 11; | 
|---|
|  | 6043 | result += result << 15; | 
|---|
|  | 6044 |  | 
|---|
|  | 6045 | return result; | 
|---|
|  | 6046 | } | 
|---|
|  | 6047 |  | 
|---|
|  | 6048 | template <typename T> T* new_xpath_variable(const char_t* name) | 
|---|
|  | 6049 | { | 
|---|
|  | 6050 | size_t length = strlength(name); | 
|---|
|  | 6051 | if (length == 0) return 0; // empty variable names are invalid | 
|---|
|  | 6052 |  | 
|---|
|  | 6053 | // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters | 
|---|
|  | 6054 | void* memory = global_allocate(sizeof(T) + length * sizeof(char_t)); | 
|---|
|  | 6055 | if (!memory) return 0; | 
|---|
|  | 6056 |  | 
|---|
|  | 6057 | T* result = new (memory) T(); | 
|---|
|  | 6058 |  | 
|---|
|  | 6059 | memcpy(result->name, name, (length + 1) * sizeof(char_t)); | 
|---|
|  | 6060 |  | 
|---|
|  | 6061 | return result; | 
|---|
|  | 6062 | } | 
|---|
|  | 6063 |  | 
|---|
|  | 6064 | xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) | 
|---|
|  | 6065 | { | 
|---|
|  | 6066 | switch (type) | 
|---|
|  | 6067 | { | 
|---|
|  | 6068 | case xpath_type_node_set: | 
|---|
|  | 6069 | return new_xpath_variable<xpath_variable_node_set>(name); | 
|---|
|  | 6070 |  | 
|---|
|  | 6071 | case xpath_type_number: | 
|---|
|  | 6072 | return new_xpath_variable<xpath_variable_number>(name); | 
|---|
|  | 6073 |  | 
|---|
|  | 6074 | case xpath_type_string: | 
|---|
|  | 6075 | return new_xpath_variable<xpath_variable_string>(name); | 
|---|
|  | 6076 |  | 
|---|
|  | 6077 | case xpath_type_boolean: | 
|---|
|  | 6078 | return new_xpath_variable<xpath_variable_boolean>(name); | 
|---|
|  | 6079 |  | 
|---|
|  | 6080 | default: | 
|---|
|  | 6081 | return 0; | 
|---|
|  | 6082 | } | 
|---|
|  | 6083 | } | 
|---|
|  | 6084 |  | 
|---|
|  | 6085 | template <typename T> void delete_xpath_variable(T* var) | 
|---|
|  | 6086 | { | 
|---|
|  | 6087 | var->~T(); | 
|---|
|  | 6088 | global_deallocate(var); | 
|---|
|  | 6089 | } | 
|---|
|  | 6090 |  | 
|---|
|  | 6091 | void delete_xpath_variable(xpath_value_type type, xpath_variable* var) | 
|---|
|  | 6092 | { | 
|---|
|  | 6093 | switch (type) | 
|---|
|  | 6094 | { | 
|---|
|  | 6095 | case xpath_type_node_set: | 
|---|
|  | 6096 | delete_xpath_variable(static_cast<xpath_variable_node_set*>(var)); | 
|---|
|  | 6097 | break; | 
|---|
|  | 6098 |  | 
|---|
|  | 6099 | case xpath_type_number: | 
|---|
|  | 6100 | delete_xpath_variable(static_cast<xpath_variable_number*>(var)); | 
|---|
|  | 6101 | break; | 
|---|
|  | 6102 |  | 
|---|
|  | 6103 | case xpath_type_string: | 
|---|
|  | 6104 | delete_xpath_variable(static_cast<xpath_variable_string*>(var)); | 
|---|
|  | 6105 | break; | 
|---|
|  | 6106 |  | 
|---|
|  | 6107 | case xpath_type_boolean: | 
|---|
|  | 6108 | delete_xpath_variable(static_cast<xpath_variable_boolean*>(var)); | 
|---|
|  | 6109 | break; | 
|---|
|  | 6110 |  | 
|---|
|  | 6111 | default: | 
|---|
|  | 6112 | assert(!"Invalid variable type"); | 
|---|
|  | 6113 | } | 
|---|
|  | 6114 | } | 
|---|
|  | 6115 |  | 
|---|
|  | 6116 | xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end) | 
|---|
|  | 6117 | { | 
|---|
|  | 6118 | char_t buffer[32]; | 
|---|
|  | 6119 |  | 
|---|
|  | 6120 | size_t length = static_cast<size_t>(end - begin); | 
|---|
|  | 6121 | char_t* scratch = buffer; | 
|---|
|  | 6122 |  | 
|---|
|  | 6123 | if (length >= sizeof(buffer) / sizeof(buffer[0])) | 
|---|
|  | 6124 | { | 
|---|
|  | 6125 | // need to make dummy on-heap copy | 
|---|
|  | 6126 | scratch = static_cast<char_t*>(global_allocate((length + 1) * sizeof(char_t))); | 
|---|
|  | 6127 | if (!scratch) return 0; | 
|---|
|  | 6128 | } | 
|---|
|  | 6129 |  | 
|---|
|  | 6130 | // copy string to zero-terminated buffer and perform lookup | 
|---|
|  | 6131 | memcpy(scratch, begin, length * sizeof(char_t)); | 
|---|
|  | 6132 | scratch[length] = 0; | 
|---|
|  | 6133 |  | 
|---|
|  | 6134 | xpath_variable* result = set->get(scratch); | 
|---|
|  | 6135 |  | 
|---|
|  | 6136 | // free dummy buffer | 
|---|
|  | 6137 | if (scratch != buffer) global_deallocate(scratch); | 
|---|
|  | 6138 |  | 
|---|
|  | 6139 | return result; | 
|---|
|  | 6140 | } | 
|---|
|  | 6141 | } | 
|---|
|  | 6142 |  | 
|---|
|  | 6143 | // Internal node set class | 
|---|
|  | 6144 | namespace | 
|---|
|  | 6145 | { | 
|---|
|  | 6146 | xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) | 
|---|
|  | 6147 | { | 
|---|
|  | 6148 | xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; | 
|---|
|  | 6149 |  | 
|---|
|  | 6150 | if (type == xpath_node_set::type_unsorted) | 
|---|
|  | 6151 | { | 
|---|
|  | 6152 | sort(begin, end, document_order_comparator()); | 
|---|
|  | 6153 |  | 
|---|
|  | 6154 | type = xpath_node_set::type_sorted; | 
|---|
|  | 6155 | } | 
|---|
|  | 6156 |  | 
|---|
|  | 6157 | if (type != order) reverse(begin, end); | 
|---|
|  | 6158 |  | 
|---|
|  | 6159 | return order; | 
|---|
|  | 6160 | } | 
|---|
|  | 6161 |  | 
|---|
|  | 6162 | xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) | 
|---|
|  | 6163 | { | 
|---|
|  | 6164 | if (begin == end) return xpath_node(); | 
|---|
|  | 6165 |  | 
|---|
|  | 6166 | switch (type) | 
|---|
|  | 6167 | { | 
|---|
|  | 6168 | case xpath_node_set::type_sorted: | 
|---|
|  | 6169 | return *begin; | 
|---|
|  | 6170 |  | 
|---|
|  | 6171 | case xpath_node_set::type_sorted_reverse: | 
|---|
|  | 6172 | return *(end - 1); | 
|---|
|  | 6173 |  | 
|---|
|  | 6174 | case xpath_node_set::type_unsorted: | 
|---|
|  | 6175 | return *min_element(begin, end, document_order_comparator()); | 
|---|
|  | 6176 |  | 
|---|
|  | 6177 | default: | 
|---|
|  | 6178 | assert(!"Invalid node set type"); | 
|---|
|  | 6179 | return xpath_node(); | 
|---|
|  | 6180 | } | 
|---|
|  | 6181 | } | 
|---|
|  | 6182 | class xpath_node_set_raw | 
|---|
|  | 6183 | { | 
|---|
|  | 6184 | xpath_node_set::type_t _type; | 
|---|
|  | 6185 |  | 
|---|
|  | 6186 | xpath_node* _begin; | 
|---|
|  | 6187 | xpath_node* _end; | 
|---|
|  | 6188 | xpath_node* _eos; | 
|---|
|  | 6189 |  | 
|---|
|  | 6190 | public: | 
|---|
|  | 6191 | xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) | 
|---|
|  | 6192 | { | 
|---|
|  | 6193 | } | 
|---|
|  | 6194 |  | 
|---|
|  | 6195 | xpath_node* begin() const | 
|---|
|  | 6196 | { | 
|---|
|  | 6197 | return _begin; | 
|---|
|  | 6198 | } | 
|---|
|  | 6199 |  | 
|---|
|  | 6200 | xpath_node* end() const | 
|---|
|  | 6201 | { | 
|---|
|  | 6202 | return _end; | 
|---|
|  | 6203 | } | 
|---|
|  | 6204 |  | 
|---|
|  | 6205 | bool empty() const | 
|---|
|  | 6206 | { | 
|---|
|  | 6207 | return _begin == _end; | 
|---|
|  | 6208 | } | 
|---|
|  | 6209 |  | 
|---|
|  | 6210 | size_t size() const | 
|---|
|  | 6211 | { | 
|---|
|  | 6212 | return static_cast<size_t>(_end - _begin); | 
|---|
|  | 6213 | } | 
|---|
|  | 6214 |  | 
|---|
|  | 6215 | xpath_node first() const | 
|---|
|  | 6216 | { | 
|---|
|  | 6217 | return xpath_first(_begin, _end, _type); | 
|---|
|  | 6218 | } | 
|---|
|  | 6219 |  | 
|---|
|  | 6220 | void push_back(const xpath_node& node, xpath_allocator* alloc) | 
|---|
|  | 6221 | { | 
|---|
|  | 6222 | if (_end == _eos) | 
|---|
|  | 6223 | { | 
|---|
|  | 6224 | size_t capacity = static_cast<size_t>(_eos - _begin); | 
|---|
|  | 6225 |  | 
|---|
|  | 6226 | // get new capacity (1.5x rule) | 
|---|
|  | 6227 | size_t new_capacity = capacity + capacity / 2 + 1; | 
|---|
|  | 6228 |  | 
|---|
|  | 6229 | // reallocate the old array or allocate a new one | 
|---|
|  | 6230 | xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); | 
|---|
|  | 6231 | assert(data); | 
|---|
|  | 6232 |  | 
|---|
|  | 6233 | // finalize | 
|---|
|  | 6234 | _begin = data; | 
|---|
|  | 6235 | _end = data + capacity; | 
|---|
|  | 6236 | _eos = data + new_capacity; | 
|---|
|  | 6237 | } | 
|---|
|  | 6238 |  | 
|---|
|  | 6239 | *_end++ = node; | 
|---|
|  | 6240 | } | 
|---|
|  | 6241 |  | 
|---|
|  | 6242 | void append(const xpath_node* begin, const xpath_node* end, xpath_allocator* alloc) | 
|---|
|  | 6243 | { | 
|---|
|  | 6244 | size_t size = static_cast<size_t>(_end - _begin); | 
|---|
|  | 6245 | size_t capacity = static_cast<size_t>(_eos - _begin); | 
|---|
|  | 6246 | size_t count = static_cast<size_t>(end - begin); | 
|---|
|  | 6247 |  | 
|---|
|  | 6248 | if (size + count > capacity) | 
|---|
|  | 6249 | { | 
|---|
|  | 6250 | // reallocate the old array or allocate a new one | 
|---|
|  | 6251 | xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size + count) * sizeof(xpath_node))); | 
|---|
|  | 6252 | assert(data); | 
|---|
|  | 6253 |  | 
|---|
|  | 6254 | // finalize | 
|---|
|  | 6255 | _begin = data; | 
|---|
|  | 6256 | _end = data + size; | 
|---|
|  | 6257 | _eos = data + size + count; | 
|---|
|  | 6258 | } | 
|---|
|  | 6259 |  | 
|---|
|  | 6260 | memcpy(_end, begin, count * sizeof(xpath_node)); | 
|---|
|  | 6261 | _end += count; | 
|---|
|  | 6262 | } | 
|---|
|  | 6263 |  | 
|---|
|  | 6264 | void sort_do() | 
|---|
|  | 6265 | { | 
|---|
|  | 6266 | _type = xpath_sort(_begin, _end, _type, false); | 
|---|
|  | 6267 | } | 
|---|
|  | 6268 |  | 
|---|
|  | 6269 | void truncate(xpath_node* pos) | 
|---|
|  | 6270 | { | 
|---|
|  | 6271 | assert(_begin <= pos && pos <= _end); | 
|---|
|  | 6272 |  | 
|---|
|  | 6273 | _end = pos; | 
|---|
|  | 6274 | } | 
|---|
|  | 6275 |  | 
|---|
|  | 6276 | void remove_duplicates() | 
|---|
|  | 6277 | { | 
|---|
|  | 6278 | if (_type == xpath_node_set::type_unsorted) | 
|---|
|  | 6279 | sort(_begin, _end, duplicate_comparator()); | 
|---|
|  | 6280 |  | 
|---|
|  | 6281 | _end = unique(_begin, _end); | 
|---|
|  | 6282 | } | 
|---|
|  | 6283 |  | 
|---|
|  | 6284 | xpath_node_set::type_t type() const | 
|---|
|  | 6285 | { | 
|---|
|  | 6286 | return _type; | 
|---|
|  | 6287 | } | 
|---|
|  | 6288 |  | 
|---|
|  | 6289 | void set_type(xpath_node_set::type_t type) | 
|---|
|  | 6290 | { | 
|---|
|  | 6291 | _type = type; | 
|---|
|  | 6292 | } | 
|---|
|  | 6293 | }; | 
|---|
|  | 6294 | } | 
|---|
|  | 6295 |  | 
|---|
|  | 6296 | namespace | 
|---|
|  | 6297 | { | 
|---|
|  | 6298 | struct xpath_context | 
|---|
|  | 6299 | { | 
|---|
|  | 6300 | xpath_node n; | 
|---|
|  | 6301 | size_t position, size; | 
|---|
|  | 6302 |  | 
|---|
|  | 6303 | xpath_context(const xpath_node& n, size_t position, size_t size): n(n), position(position), size(size) | 
|---|
|  | 6304 | { | 
|---|
|  | 6305 | } | 
|---|
|  | 6306 | }; | 
|---|
|  | 6307 |  | 
|---|
|  | 6308 | enum lexeme_t | 
|---|
|  | 6309 | { | 
|---|
|  | 6310 | lex_none = 0, | 
|---|
|  | 6311 | lex_equal, | 
|---|
|  | 6312 | lex_not_equal, | 
|---|
|  | 6313 | lex_less, | 
|---|
|  | 6314 | lex_greater, | 
|---|
|  | 6315 | lex_less_or_equal, | 
|---|
|  | 6316 | lex_greater_or_equal, | 
|---|
|  | 6317 | lex_plus, | 
|---|
|  | 6318 | lex_minus, | 
|---|
|  | 6319 | lex_multiply, | 
|---|
|  | 6320 | lex_union, | 
|---|
|  | 6321 | lex_var_ref, | 
|---|
|  | 6322 | lex_open_brace, | 
|---|
|  | 6323 | lex_close_brace, | 
|---|
|  | 6324 | lex_quoted_string, | 
|---|
|  | 6325 | lex_number, | 
|---|
|  | 6326 | lex_slash, | 
|---|
|  | 6327 | lex_double_slash, | 
|---|
|  | 6328 | lex_open_square_brace, | 
|---|
|  | 6329 | lex_close_square_brace, | 
|---|
|  | 6330 | lex_string, | 
|---|
|  | 6331 | lex_comma, | 
|---|
|  | 6332 | lex_axis_attribute, | 
|---|
|  | 6333 | lex_dot, | 
|---|
|  | 6334 | lex_double_dot, | 
|---|
|  | 6335 | lex_double_colon, | 
|---|
|  | 6336 | lex_eof | 
|---|
|  | 6337 | }; | 
|---|
|  | 6338 |  | 
|---|
|  | 6339 | struct xpath_lexer_string | 
|---|
|  | 6340 | { | 
|---|
|  | 6341 | const char_t* begin; | 
|---|
|  | 6342 | const char_t* end; | 
|---|
|  | 6343 |  | 
|---|
|  | 6344 | xpath_lexer_string(): begin(0), end(0) | 
|---|
|  | 6345 | { | 
|---|
|  | 6346 | } | 
|---|
|  | 6347 |  | 
|---|
|  | 6348 | bool operator==(const char_t* other) const | 
|---|
|  | 6349 | { | 
|---|
|  | 6350 | size_t length = static_cast<size_t>(end - begin); | 
|---|
|  | 6351 |  | 
|---|
|  | 6352 | return strequalrange(other, begin, length); | 
|---|
|  | 6353 | } | 
|---|
|  | 6354 | }; | 
|---|
|  | 6355 |  | 
|---|
|  | 6356 | class xpath_lexer | 
|---|
|  | 6357 | { | 
|---|
|  | 6358 | const char_t* _cur; | 
|---|
|  | 6359 | const char_t* _cur_lexeme_pos; | 
|---|
|  | 6360 | xpath_lexer_string _cur_lexeme_contents; | 
|---|
|  | 6361 |  | 
|---|
|  | 6362 | lexeme_t _cur_lexeme; | 
|---|
|  | 6363 |  | 
|---|
|  | 6364 | public: | 
|---|
|  | 6365 | explicit xpath_lexer(const char_t* query): _cur(query) | 
|---|
|  | 6366 | { | 
|---|
|  | 6367 | next(); | 
|---|
|  | 6368 | } | 
|---|
|  | 6369 |  | 
|---|
|  | 6370 | const char_t* state() const | 
|---|
|  | 6371 | { | 
|---|
|  | 6372 | return _cur; | 
|---|
|  | 6373 | } | 
|---|
|  | 6374 |  | 
|---|
|  | 6375 | void next() | 
|---|
|  | 6376 | { | 
|---|
|  | 6377 | const char_t* cur = _cur; | 
|---|
|  | 6378 |  | 
|---|
|  | 6379 | while (IS_CHARTYPE(*cur, ct_space)) ++cur; | 
|---|
|  | 6380 |  | 
|---|
|  | 6381 | // save lexeme position for error reporting | 
|---|
|  | 6382 | _cur_lexeme_pos = cur; | 
|---|
|  | 6383 |  | 
|---|
|  | 6384 | switch (*cur) | 
|---|
|  | 6385 | { | 
|---|
|  | 6386 | case 0: | 
|---|
|  | 6387 | _cur_lexeme = lex_eof; | 
|---|
|  | 6388 | break; | 
|---|
|  | 6389 |  | 
|---|
|  | 6390 | case '>': | 
|---|
|  | 6391 | if (*(cur+1) == '=') | 
|---|
|  | 6392 | { | 
|---|
|  | 6393 | cur += 2; | 
|---|
|  | 6394 | _cur_lexeme = lex_greater_or_equal; | 
|---|
|  | 6395 | } | 
|---|
|  | 6396 | else | 
|---|
|  | 6397 | { | 
|---|
|  | 6398 | cur += 1; | 
|---|
|  | 6399 | _cur_lexeme = lex_greater; | 
|---|
|  | 6400 | } | 
|---|
|  | 6401 | break; | 
|---|
|  | 6402 |  | 
|---|
|  | 6403 | case '<': | 
|---|
|  | 6404 | if (*(cur+1) == '=') | 
|---|
|  | 6405 | { | 
|---|
|  | 6406 | cur += 2; | 
|---|
|  | 6407 | _cur_lexeme = lex_less_or_equal; | 
|---|
|  | 6408 | } | 
|---|
|  | 6409 | else | 
|---|
|  | 6410 | { | 
|---|
|  | 6411 | cur += 1; | 
|---|
|  | 6412 | _cur_lexeme = lex_less; | 
|---|
|  | 6413 | } | 
|---|
|  | 6414 | break; | 
|---|
|  | 6415 |  | 
|---|
|  | 6416 | case '!': | 
|---|
|  | 6417 | if (*(cur+1) == '=') | 
|---|
|  | 6418 | { | 
|---|
|  | 6419 | cur += 2; | 
|---|
|  | 6420 | _cur_lexeme = lex_not_equal; | 
|---|
|  | 6421 | } | 
|---|
|  | 6422 | else | 
|---|
|  | 6423 | { | 
|---|
|  | 6424 | _cur_lexeme = lex_none; | 
|---|
|  | 6425 | } | 
|---|
|  | 6426 | break; | 
|---|
|  | 6427 |  | 
|---|
|  | 6428 | case '=': | 
|---|
|  | 6429 | cur += 1; | 
|---|
|  | 6430 | _cur_lexeme = lex_equal; | 
|---|
|  | 6431 |  | 
|---|
|  | 6432 | break; | 
|---|
|  | 6433 |  | 
|---|
|  | 6434 | case '+': | 
|---|
|  | 6435 | cur += 1; | 
|---|
|  | 6436 | _cur_lexeme = lex_plus; | 
|---|
|  | 6437 |  | 
|---|
|  | 6438 | break; | 
|---|
|  | 6439 |  | 
|---|
|  | 6440 | case '-': | 
|---|
|  | 6441 | cur += 1; | 
|---|
|  | 6442 | _cur_lexeme = lex_minus; | 
|---|
|  | 6443 |  | 
|---|
|  | 6444 | break; | 
|---|
|  | 6445 |  | 
|---|
|  | 6446 | case '*': | 
|---|
|  | 6447 | cur += 1; | 
|---|
|  | 6448 | _cur_lexeme = lex_multiply; | 
|---|
|  | 6449 |  | 
|---|
|  | 6450 | break; | 
|---|
|  | 6451 |  | 
|---|
|  | 6452 | case '|': | 
|---|
|  | 6453 | cur += 1; | 
|---|
|  | 6454 | _cur_lexeme = lex_union; | 
|---|
|  | 6455 |  | 
|---|
|  | 6456 | break; | 
|---|
|  | 6457 |  | 
|---|
|  | 6458 | case '$': | 
|---|
|  | 6459 | cur += 1; | 
|---|
|  | 6460 |  | 
|---|
|  | 6461 | if (IS_CHARTYPEX(*cur, ctx_start_symbol)) | 
|---|
|  | 6462 | { | 
|---|
|  | 6463 | _cur_lexeme_contents.begin = cur; | 
|---|
|  | 6464 |  | 
|---|
|  | 6465 | while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | 
|---|
|  | 6466 |  | 
|---|
|  | 6467 | if (cur[0] == ':' && IS_CHARTYPEX(cur[1], ctx_symbol)) // qname | 
|---|
|  | 6468 | { | 
|---|
|  | 6469 | cur++; // : | 
|---|
|  | 6470 |  | 
|---|
|  | 6471 | while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | 
|---|
|  | 6472 | } | 
|---|
|  | 6473 |  | 
|---|
|  | 6474 | _cur_lexeme_contents.end = cur; | 
|---|
|  | 6475 |  | 
|---|
|  | 6476 | _cur_lexeme = lex_var_ref; | 
|---|
|  | 6477 | } | 
|---|
|  | 6478 | else | 
|---|
|  | 6479 | { | 
|---|
|  | 6480 | _cur_lexeme = lex_none; | 
|---|
|  | 6481 | } | 
|---|
|  | 6482 |  | 
|---|
|  | 6483 | break; | 
|---|
|  | 6484 |  | 
|---|
|  | 6485 | case '(': | 
|---|
|  | 6486 | cur += 1; | 
|---|
|  | 6487 | _cur_lexeme = lex_open_brace; | 
|---|
|  | 6488 |  | 
|---|
|  | 6489 | break; | 
|---|
|  | 6490 |  | 
|---|
|  | 6491 | case ')': | 
|---|
|  | 6492 | cur += 1; | 
|---|
|  | 6493 | _cur_lexeme = lex_close_brace; | 
|---|
|  | 6494 |  | 
|---|
|  | 6495 | break; | 
|---|
|  | 6496 |  | 
|---|
|  | 6497 | case '[': | 
|---|
|  | 6498 | cur += 1; | 
|---|
|  | 6499 | _cur_lexeme = lex_open_square_brace; | 
|---|
|  | 6500 |  | 
|---|
|  | 6501 | break; | 
|---|
|  | 6502 |  | 
|---|
|  | 6503 | case ']': | 
|---|
|  | 6504 | cur += 1; | 
|---|
|  | 6505 | _cur_lexeme = lex_close_square_brace; | 
|---|
|  | 6506 |  | 
|---|
|  | 6507 | break; | 
|---|
|  | 6508 |  | 
|---|
|  | 6509 | case ',': | 
|---|
|  | 6510 | cur += 1; | 
|---|
|  | 6511 | _cur_lexeme = lex_comma; | 
|---|
|  | 6512 |  | 
|---|
|  | 6513 | break; | 
|---|
|  | 6514 |  | 
|---|
|  | 6515 | case '/': | 
|---|
|  | 6516 | if (*(cur+1) == '/') | 
|---|
|  | 6517 | { | 
|---|
|  | 6518 | cur += 2; | 
|---|
|  | 6519 | _cur_lexeme = lex_double_slash; | 
|---|
|  | 6520 | } | 
|---|
|  | 6521 | else | 
|---|
|  | 6522 | { | 
|---|
|  | 6523 | cur += 1; | 
|---|
|  | 6524 | _cur_lexeme = lex_slash; | 
|---|
|  | 6525 | } | 
|---|
|  | 6526 | break; | 
|---|
|  | 6527 |  | 
|---|
|  | 6528 | case '.': | 
|---|
|  | 6529 | if (*(cur+1) == '.') | 
|---|
|  | 6530 | { | 
|---|
|  | 6531 | cur += 2; | 
|---|
|  | 6532 | _cur_lexeme = lex_double_dot; | 
|---|
|  | 6533 | } | 
|---|
|  | 6534 | else if (IS_CHARTYPEX(*(cur+1), ctx_digit)) | 
|---|
|  | 6535 | { | 
|---|
|  | 6536 | _cur_lexeme_contents.begin = cur; // . | 
|---|
|  | 6537 |  | 
|---|
|  | 6538 | ++cur; | 
|---|
|  | 6539 |  | 
|---|
|  | 6540 | while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; | 
|---|
|  | 6541 |  | 
|---|
|  | 6542 | _cur_lexeme_contents.end = cur; | 
|---|
|  | 6543 |  | 
|---|
|  | 6544 | _cur_lexeme = lex_number; | 
|---|
|  | 6545 | } | 
|---|
|  | 6546 | else | 
|---|
|  | 6547 | { | 
|---|
|  | 6548 | cur += 1; | 
|---|
|  | 6549 | _cur_lexeme = lex_dot; | 
|---|
|  | 6550 | } | 
|---|
|  | 6551 | break; | 
|---|
|  | 6552 |  | 
|---|
|  | 6553 | case '@': | 
|---|
|  | 6554 | cur += 1; | 
|---|
|  | 6555 | _cur_lexeme = lex_axis_attribute; | 
|---|
|  | 6556 |  | 
|---|
|  | 6557 | break; | 
|---|
|  | 6558 |  | 
|---|
|  | 6559 | case '"': | 
|---|
|  | 6560 | case '\'': | 
|---|
|  | 6561 | { | 
|---|
|  | 6562 | char_t terminator = *cur; | 
|---|
|  | 6563 |  | 
|---|
|  | 6564 | ++cur; | 
|---|
|  | 6565 |  | 
|---|
|  | 6566 | _cur_lexeme_contents.begin = cur; | 
|---|
|  | 6567 | while (*cur && *cur != terminator) cur++; | 
|---|
|  | 6568 | _cur_lexeme_contents.end = cur; | 
|---|
|  | 6569 |  | 
|---|
|  | 6570 | if (!*cur) | 
|---|
|  | 6571 | _cur_lexeme = lex_none; | 
|---|
|  | 6572 | else | 
|---|
|  | 6573 | { | 
|---|
|  | 6574 | cur += 1; | 
|---|
|  | 6575 | _cur_lexeme = lex_quoted_string; | 
|---|
|  | 6576 | } | 
|---|
|  | 6577 |  | 
|---|
|  | 6578 | break; | 
|---|
|  | 6579 | } | 
|---|
|  | 6580 |  | 
|---|
|  | 6581 | case ':': | 
|---|
|  | 6582 | if (*(cur+1) == ':') | 
|---|
|  | 6583 | { | 
|---|
|  | 6584 | cur += 2; | 
|---|
|  | 6585 | _cur_lexeme = lex_double_colon; | 
|---|
|  | 6586 | } | 
|---|
|  | 6587 | else | 
|---|
|  | 6588 | { | 
|---|
|  | 6589 | _cur_lexeme = lex_none; | 
|---|
|  | 6590 | } | 
|---|
|  | 6591 | break; | 
|---|
|  | 6592 |  | 
|---|
|  | 6593 | default: | 
|---|
|  | 6594 | if (IS_CHARTYPEX(*cur, ctx_digit)) | 
|---|
|  | 6595 | { | 
|---|
|  | 6596 | _cur_lexeme_contents.begin = cur; | 
|---|
|  | 6597 |  | 
|---|
|  | 6598 | while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; | 
|---|
|  | 6599 |  | 
|---|
|  | 6600 | if (*cur == '.') | 
|---|
|  | 6601 | { | 
|---|
|  | 6602 | cur++; | 
|---|
|  | 6603 |  | 
|---|
|  | 6604 | while (IS_CHARTYPEX(*cur, ctx_digit)) cur++; | 
|---|
|  | 6605 | } | 
|---|
|  | 6606 |  | 
|---|
|  | 6607 | _cur_lexeme_contents.end = cur; | 
|---|
|  | 6608 |  | 
|---|
|  | 6609 | _cur_lexeme = lex_number; | 
|---|
|  | 6610 | } | 
|---|
|  | 6611 | else if (IS_CHARTYPEX(*cur, ctx_start_symbol)) | 
|---|
|  | 6612 | { | 
|---|
|  | 6613 | _cur_lexeme_contents.begin = cur; | 
|---|
|  | 6614 |  | 
|---|
|  | 6615 | while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | 
|---|
|  | 6616 |  | 
|---|
|  | 6617 | if (cur[0] == ':') | 
|---|
|  | 6618 | { | 
|---|
|  | 6619 | if (cur[1] == '*') // namespace test ncname:* | 
|---|
|  | 6620 | { | 
|---|
|  | 6621 | cur += 2; // :* | 
|---|
|  | 6622 | } | 
|---|
|  | 6623 | else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname | 
|---|
|  | 6624 | { | 
|---|
|  | 6625 | cur++; // : | 
|---|
|  | 6626 |  | 
|---|
|  | 6627 | while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | 
|---|
|  | 6628 | } | 
|---|
|  | 6629 | } | 
|---|
|  | 6630 |  | 
|---|
|  | 6631 | _cur_lexeme_contents.end = cur; | 
|---|
|  | 6632 |  | 
|---|
|  | 6633 | _cur_lexeme = lex_string; | 
|---|
|  | 6634 | } | 
|---|
|  | 6635 | else | 
|---|
|  | 6636 | { | 
|---|
|  | 6637 | _cur_lexeme = lex_none; | 
|---|
|  | 6638 | } | 
|---|
|  | 6639 | } | 
|---|
|  | 6640 |  | 
|---|
|  | 6641 | _cur = cur; | 
|---|
|  | 6642 | } | 
|---|
|  | 6643 |  | 
|---|
|  | 6644 | lexeme_t current() const | 
|---|
|  | 6645 | { | 
|---|
|  | 6646 | return _cur_lexeme; | 
|---|
|  | 6647 | } | 
|---|
|  | 6648 |  | 
|---|
|  | 6649 | const char_t* current_pos() const | 
|---|
|  | 6650 | { | 
|---|
|  | 6651 | return _cur_lexeme_pos; | 
|---|
|  | 6652 | } | 
|---|
|  | 6653 |  | 
|---|
|  | 6654 | const xpath_lexer_string& contents() const | 
|---|
|  | 6655 | { | 
|---|
|  | 6656 | assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); | 
|---|
|  | 6657 |  | 
|---|
|  | 6658 | return _cur_lexeme_contents; | 
|---|
|  | 6659 | } | 
|---|
|  | 6660 | }; | 
|---|
|  | 6661 |  | 
|---|
|  | 6662 | enum ast_type_t | 
|---|
|  | 6663 | { | 
|---|
|  | 6664 | ast_op_or,                                              // left or right | 
|---|
|  | 6665 | ast_op_and,                                             // left and right | 
|---|
|  | 6666 | ast_op_equal,                                   // left = right | 
|---|
|  | 6667 | ast_op_not_equal,                               // left != right | 
|---|
|  | 6668 | ast_op_less,                                    // left < right | 
|---|
|  | 6669 | ast_op_greater,                                 // left > right | 
|---|
|  | 6670 | ast_op_less_or_equal,                   // left <= right | 
|---|
|  | 6671 | ast_op_greater_or_equal,                // left >= right | 
|---|
|  | 6672 | ast_op_add,                                             // left + right | 
|---|
|  | 6673 | ast_op_subtract,                                // left - right | 
|---|
|  | 6674 | ast_op_multiply,                                // left * right | 
|---|
|  | 6675 | ast_op_divide,                                  // left / right | 
|---|
|  | 6676 | ast_op_mod,                                             // left % right | 
|---|
|  | 6677 | ast_op_negate,                                  // left - right | 
|---|
|  | 6678 | ast_op_union,                                   // left | right | 
|---|
|  | 6679 | ast_predicate,                                  // apply predicate to set; next points to next predicate | 
|---|
|  | 6680 | ast_filter,                                             // select * from left where right | 
|---|
|  | 6681 | ast_filter_posinv,                              // select * from left where right; proximity position invariant | 
|---|
|  | 6682 | ast_string_constant,                    // string constant | 
|---|
|  | 6683 | ast_number_constant,                    // number constant | 
|---|
|  | 6684 | ast_variable,                                   // variable | 
|---|
|  | 6685 | ast_func_last,                                  // last() | 
|---|
|  | 6686 | ast_func_position,                              // position() | 
|---|
|  | 6687 | ast_func_count,                                 // count(left) | 
|---|
|  | 6688 | ast_func_id,                                    // id(left) | 
|---|
|  | 6689 | ast_func_local_name_0,                  // local-name() | 
|---|
|  | 6690 | ast_func_local_name_1,                  // local-name(left) | 
|---|
|  | 6691 | ast_func_namespace_uri_0,               // namespace-uri() | 
|---|
|  | 6692 | ast_func_namespace_uri_1,               // namespace-uri(left) | 
|---|
|  | 6693 | ast_func_name_0,                                // name() | 
|---|
|  | 6694 | ast_func_name_1,                                // name(left) | 
|---|
|  | 6695 | ast_func_string_0,                              // string() | 
|---|
|  | 6696 | ast_func_string_1,                              // string(left) | 
|---|
|  | 6697 | ast_func_concat,                                // concat(left, right, siblings) | 
|---|
|  | 6698 | ast_func_starts_with,                   // starts_with(left, right) | 
|---|
|  | 6699 | ast_func_contains,                              // contains(left, right) | 
|---|
|  | 6700 | ast_func_substring_before,              // substring-before(left, right) | 
|---|
|  | 6701 | ast_func_substring_after,               // substring-after(left, right) | 
|---|
|  | 6702 | ast_func_substring_2,                   // substring(left, right) | 
|---|
|  | 6703 | ast_func_substring_3,                   // substring(left, right, third) | 
|---|
|  | 6704 | ast_func_string_length_0,               // string-length() | 
|---|
|  | 6705 | ast_func_string_length_1,               // string-length(left) | 
|---|
|  | 6706 | ast_func_normalize_space_0,             // normalize-space() | 
|---|
|  | 6707 | ast_func_normalize_space_1,             // normalize-space(left) | 
|---|
|  | 6708 | ast_func_translate,                             // translate(left, right, third) | 
|---|
|  | 6709 | ast_func_boolean,                               // boolean(left) | 
|---|
|  | 6710 | ast_func_not,                                   // not(left) | 
|---|
|  | 6711 | ast_func_true,                                  // true() | 
|---|
|  | 6712 | ast_func_false,                                 // false() | 
|---|
|  | 6713 | ast_func_lang,                                  // lang(left) | 
|---|
|  | 6714 | ast_func_number_0,                              // number() | 
|---|
|  | 6715 | ast_func_number_1,                              // number(left) | 
|---|
|  | 6716 | ast_func_sum,                                   // sum(left) | 
|---|
|  | 6717 | ast_func_floor,                                 // floor(left) | 
|---|
|  | 6718 | ast_func_ceiling,                               // ceiling(left) | 
|---|
|  | 6719 | ast_func_round,                                 // round(left) | 
|---|
|  | 6720 | ast_step,                                               // process set left with step | 
|---|
|  | 6721 | ast_step_root                                   // select root node | 
|---|
|  | 6722 | }; | 
|---|
|  | 6723 |  | 
|---|
|  | 6724 | enum axis_t | 
|---|
|  | 6725 | { | 
|---|
|  | 6726 | axis_ancestor, | 
|---|
|  | 6727 | axis_ancestor_or_self, | 
|---|
|  | 6728 | axis_attribute, | 
|---|
|  | 6729 | axis_child, | 
|---|
|  | 6730 | axis_descendant, | 
|---|
|  | 6731 | axis_descendant_or_self, | 
|---|
|  | 6732 | axis_following, | 
|---|
|  | 6733 | axis_following_sibling, | 
|---|
|  | 6734 | axis_namespace, | 
|---|
|  | 6735 | axis_parent, | 
|---|
|  | 6736 | axis_preceding, | 
|---|
|  | 6737 | axis_preceding_sibling, | 
|---|
|  | 6738 | axis_self | 
|---|
|  | 6739 | }; | 
|---|
|  | 6740 |  | 
|---|
|  | 6741 | enum nodetest_t | 
|---|
|  | 6742 | { | 
|---|
|  | 6743 | nodetest_none, | 
|---|
|  | 6744 | nodetest_name, | 
|---|
|  | 6745 | nodetest_type_node, | 
|---|
|  | 6746 | nodetest_type_comment, | 
|---|
|  | 6747 | nodetest_type_pi, | 
|---|
|  | 6748 | nodetest_type_text, | 
|---|
|  | 6749 | nodetest_pi, | 
|---|
|  | 6750 | nodetest_all, | 
|---|
|  | 6751 | nodetest_all_in_namespace | 
|---|
|  | 6752 | }; | 
|---|
|  | 6753 |  | 
|---|
|  | 6754 | template <axis_t N> struct axis_to_type | 
|---|
|  | 6755 | { | 
|---|
|  | 6756 | static const axis_t axis; | 
|---|
|  | 6757 | }; | 
|---|
|  | 6758 |  | 
|---|
|  | 6759 | template <axis_t N> const axis_t axis_to_type<N>::axis = N; | 
|---|
|  | 6760 |  | 
|---|
|  | 6761 | class xpath_ast_node | 
|---|
|  | 6762 | { | 
|---|
|  | 6763 | private: | 
|---|
|  | 6764 | // node type | 
|---|
|  | 6765 | char _type; | 
|---|
|  | 6766 | char _rettype; | 
|---|
|  | 6767 |  | 
|---|
|  | 6768 | // for ast_step / ast_predicate | 
|---|
|  | 6769 | char _axis; | 
|---|
|  | 6770 | char _test; | 
|---|
|  | 6771 |  | 
|---|
|  | 6772 | // tree node structure | 
|---|
|  | 6773 | xpath_ast_node* _left; | 
|---|
|  | 6774 | xpath_ast_node* _right; | 
|---|
|  | 6775 | xpath_ast_node* _next; | 
|---|
|  | 6776 |  | 
|---|
|  | 6777 | union | 
|---|
|  | 6778 | { | 
|---|
|  | 6779 | // value for ast_string_constant | 
|---|
|  | 6780 | const char_t* string; | 
|---|
|  | 6781 | // value for ast_number_constant | 
|---|
|  | 6782 | double number; | 
|---|
|  | 6783 | // variable for ast_variable | 
|---|
|  | 6784 | xpath_variable* variable; | 
|---|
|  | 6785 | // node test for ast_step (node name/namespace/node type/pi target) | 
|---|
|  | 6786 | const char_t* nodetest; | 
|---|
|  | 6787 | } _data; | 
|---|
|  | 6788 |  | 
|---|
|  | 6789 | xpath_ast_node(const xpath_ast_node&); | 
|---|
|  | 6790 | xpath_ast_node& operator=(const xpath_ast_node&); | 
|---|
|  | 6791 |  | 
|---|
|  | 6792 | template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) | 
|---|
|  | 6793 | { | 
|---|
|  | 6794 | xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); | 
|---|
|  | 6795 |  | 
|---|
|  | 6796 | if (lt != xpath_type_node_set && rt != xpath_type_node_set) | 
|---|
|  | 6797 | { | 
|---|
|  | 6798 | if (lt == xpath_type_boolean || rt == xpath_type_boolean) | 
|---|
|  | 6799 | return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); | 
|---|
|  | 6800 | else if (lt == xpath_type_number || rt == xpath_type_number) | 
|---|
|  | 6801 | return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); | 
|---|
|  | 6802 | else if (lt == xpath_type_string || rt == xpath_type_string) | 
|---|
|  | 6803 | { | 
|---|
|  | 6804 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 6805 |  | 
|---|
|  | 6806 | xpath_string ls = lhs->eval_string(c, stack); | 
|---|
|  | 6807 | xpath_string rs = rhs->eval_string(c, stack); | 
|---|
|  | 6808 |  | 
|---|
|  | 6809 | return comp(ls, rs); | 
|---|
|  | 6810 | } | 
|---|
|  | 6811 | } | 
|---|
|  | 6812 | else if (lt == xpath_type_node_set && rt == xpath_type_node_set) | 
|---|
|  | 6813 | { | 
|---|
|  | 6814 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 6815 |  | 
|---|
|  | 6816 | xpath_node_set_raw ls = lhs->eval_node_set(c, stack); | 
|---|
|  | 6817 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack); | 
|---|
|  | 6818 |  | 
|---|
|  | 6819 | for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | 
|---|
|  | 6820 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | 
|---|
|  | 6821 | { | 
|---|
|  | 6822 | xpath_allocator_capture cri(stack.result); | 
|---|
|  | 6823 |  | 
|---|
|  | 6824 | if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) | 
|---|
|  | 6825 | return true; | 
|---|
|  | 6826 | } | 
|---|
|  | 6827 |  | 
|---|
|  | 6828 | return false; | 
|---|
|  | 6829 | } | 
|---|
|  | 6830 | else | 
|---|
|  | 6831 | { | 
|---|
|  | 6832 | if (lt == xpath_type_node_set) | 
|---|
|  | 6833 | { | 
|---|
|  | 6834 | swap(lhs, rhs); | 
|---|
|  | 6835 | swap(lt, rt); | 
|---|
|  | 6836 | } | 
|---|
|  | 6837 |  | 
|---|
|  | 6838 | if (lt == xpath_type_boolean) | 
|---|
|  | 6839 | return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); | 
|---|
|  | 6840 | else if (lt == xpath_type_number) | 
|---|
|  | 6841 | { | 
|---|
|  | 6842 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 6843 |  | 
|---|
|  | 6844 | double l = lhs->eval_number(c, stack); | 
|---|
|  | 6845 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack); | 
|---|
|  | 6846 |  | 
|---|
|  | 6847 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | 
|---|
|  | 6848 | { | 
|---|
|  | 6849 | xpath_allocator_capture cri(stack.result); | 
|---|
|  | 6850 |  | 
|---|
|  | 6851 | if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | 
|---|
|  | 6852 | return true; | 
|---|
|  | 6853 | } | 
|---|
|  | 6854 |  | 
|---|
|  | 6855 | return false; | 
|---|
|  | 6856 | } | 
|---|
|  | 6857 | else if (lt == xpath_type_string) | 
|---|
|  | 6858 | { | 
|---|
|  | 6859 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 6860 |  | 
|---|
|  | 6861 | xpath_string l = lhs->eval_string(c, stack); | 
|---|
|  | 6862 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack); | 
|---|
|  | 6863 |  | 
|---|
|  | 6864 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | 
|---|
|  | 6865 | { | 
|---|
|  | 6866 | xpath_allocator_capture cri(stack.result); | 
|---|
|  | 6867 |  | 
|---|
|  | 6868 | if (comp(l, string_value(*ri, stack.result))) | 
|---|
|  | 6869 | return true; | 
|---|
|  | 6870 | } | 
|---|
|  | 6871 |  | 
|---|
|  | 6872 | return false; | 
|---|
|  | 6873 | } | 
|---|
|  | 6874 | } | 
|---|
|  | 6875 |  | 
|---|
|  | 6876 | assert(!"Wrong types"); | 
|---|
|  | 6877 | return false; | 
|---|
|  | 6878 | } | 
|---|
|  | 6879 |  | 
|---|
|  | 6880 | template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) | 
|---|
|  | 6881 | { | 
|---|
|  | 6882 | xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); | 
|---|
|  | 6883 |  | 
|---|
|  | 6884 | if (lt != xpath_type_node_set && rt != xpath_type_node_set) | 
|---|
|  | 6885 | return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); | 
|---|
|  | 6886 | else if (lt == xpath_type_node_set && rt == xpath_type_node_set) | 
|---|
|  | 6887 | { | 
|---|
|  | 6888 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 6889 |  | 
|---|
|  | 6890 | xpath_node_set_raw ls = lhs->eval_node_set(c, stack); | 
|---|
|  | 6891 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack); | 
|---|
|  | 6892 |  | 
|---|
|  | 6893 | for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | 
|---|
|  | 6894 | { | 
|---|
|  | 6895 | xpath_allocator_capture cri(stack.result); | 
|---|
|  | 6896 |  | 
|---|
|  | 6897 | double l = convert_string_to_number(string_value(*li, stack.result).c_str()); | 
|---|
|  | 6898 |  | 
|---|
|  | 6899 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | 
|---|
|  | 6900 | { | 
|---|
|  | 6901 | xpath_allocator_capture crii(stack.result); | 
|---|
|  | 6902 |  | 
|---|
|  | 6903 | if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | 
|---|
|  | 6904 | return true; | 
|---|
|  | 6905 | } | 
|---|
|  | 6906 | } | 
|---|
|  | 6907 |  | 
|---|
|  | 6908 | return false; | 
|---|
|  | 6909 | } | 
|---|
|  | 6910 | else if (lt != xpath_type_node_set && rt == xpath_type_node_set) | 
|---|
|  | 6911 | { | 
|---|
|  | 6912 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 6913 |  | 
|---|
|  | 6914 | double l = lhs->eval_number(c, stack); | 
|---|
|  | 6915 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack); | 
|---|
|  | 6916 |  | 
|---|
|  | 6917 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | 
|---|
|  | 6918 | { | 
|---|
|  | 6919 | xpath_allocator_capture cri(stack.result); | 
|---|
|  | 6920 |  | 
|---|
|  | 6921 | if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | 
|---|
|  | 6922 | return true; | 
|---|
|  | 6923 | } | 
|---|
|  | 6924 |  | 
|---|
|  | 6925 | return false; | 
|---|
|  | 6926 | } | 
|---|
|  | 6927 | else if (lt == xpath_type_node_set && rt != xpath_type_node_set) | 
|---|
|  | 6928 | { | 
|---|
|  | 6929 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 6930 |  | 
|---|
|  | 6931 | xpath_node_set_raw ls = lhs->eval_node_set(c, stack); | 
|---|
|  | 6932 | double r = rhs->eval_number(c, stack); | 
|---|
|  | 6933 |  | 
|---|
|  | 6934 | for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | 
|---|
|  | 6935 | { | 
|---|
|  | 6936 | xpath_allocator_capture cri(stack.result); | 
|---|
|  | 6937 |  | 
|---|
|  | 6938 | if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) | 
|---|
|  | 6939 | return true; | 
|---|
|  | 6940 | } | 
|---|
|  | 6941 |  | 
|---|
|  | 6942 | return false; | 
|---|
|  | 6943 | } | 
|---|
|  | 6944 | else | 
|---|
|  | 6945 | { | 
|---|
|  | 6946 | assert(!"Wrong types"); | 
|---|
|  | 6947 | return false; | 
|---|
|  | 6948 | } | 
|---|
|  | 6949 | } | 
|---|
|  | 6950 |  | 
|---|
|  | 6951 | void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) | 
|---|
|  | 6952 | { | 
|---|
|  | 6953 | assert(ns.size() >= first); | 
|---|
|  | 6954 |  | 
|---|
|  | 6955 | size_t i = 1; | 
|---|
|  | 6956 | size_t size = ns.size() - first; | 
|---|
|  | 6957 |  | 
|---|
|  | 6958 | xpath_node* last = ns.begin() + first; | 
|---|
|  | 6959 |  | 
|---|
|  | 6960 | // remove_if... or well, sort of | 
|---|
|  | 6961 | for (xpath_node* it = last; it != ns.end(); ++it, ++i) | 
|---|
|  | 6962 | { | 
|---|
|  | 6963 | xpath_context c(*it, i, size); | 
|---|
|  | 6964 |  | 
|---|
|  | 6965 | if (expr->rettype() == xpath_type_number) | 
|---|
|  | 6966 | { | 
|---|
|  | 6967 | if (expr->eval_number(c, stack) == i) | 
|---|
|  | 6968 | *last++ = *it; | 
|---|
|  | 6969 | } | 
|---|
|  | 6970 | else if (expr->eval_boolean(c, stack)) | 
|---|
|  | 6971 | *last++ = *it; | 
|---|
|  | 6972 | } | 
|---|
|  | 6973 |  | 
|---|
|  | 6974 | ns.truncate(last); | 
|---|
|  | 6975 | } | 
|---|
|  | 6976 |  | 
|---|
|  | 6977 | void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack) | 
|---|
|  | 6978 | { | 
|---|
|  | 6979 | if (ns.size() == first) return; | 
|---|
|  | 6980 |  | 
|---|
|  | 6981 | for (xpath_ast_node* pred = _right; pred; pred = pred->_next) | 
|---|
|  | 6982 | { | 
|---|
|  | 6983 | apply_predicate(ns, first, pred->_left, stack); | 
|---|
|  | 6984 | } | 
|---|
|  | 6985 | } | 
|---|
|  | 6986 |  | 
|---|
|  | 6987 | void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc) | 
|---|
|  | 6988 | { | 
|---|
|  | 6989 | if (!a) return; | 
|---|
|  | 6990 |  | 
|---|
|  | 6991 | const char_t* name = a.name(); | 
|---|
|  | 6992 |  | 
|---|
|  | 6993 | // There are no attribute nodes corresponding to attributes that declare namespaces | 
|---|
|  | 6994 | // That is, "xmlns:..." or "xmlns" | 
|---|
|  | 6995 | if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return; | 
|---|
|  | 6996 |  | 
|---|
|  | 6997 | switch (_test) | 
|---|
|  | 6998 | { | 
|---|
|  | 6999 | case nodetest_name: | 
|---|
|  | 7000 | if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc); | 
|---|
|  | 7001 | break; | 
|---|
|  | 7002 |  | 
|---|
|  | 7003 | case nodetest_type_node: | 
|---|
|  | 7004 | case nodetest_all: | 
|---|
|  | 7005 | ns.push_back(xpath_node(a, parent), alloc); | 
|---|
|  | 7006 | break; | 
|---|
|  | 7007 |  | 
|---|
|  | 7008 | case nodetest_all_in_namespace: | 
|---|
|  | 7009 | if (starts_with(name, _data.nodetest)) | 
|---|
|  | 7010 | ns.push_back(xpath_node(a, parent), alloc); | 
|---|
|  | 7011 | break; | 
|---|
|  | 7012 |  | 
|---|
|  | 7013 | default: | 
|---|
|  | 7014 | ; | 
|---|
|  | 7015 | } | 
|---|
|  | 7016 | } | 
|---|
|  | 7017 |  | 
|---|
|  | 7018 | void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc) | 
|---|
|  | 7019 | { | 
|---|
|  | 7020 | if (!n) return; | 
|---|
|  | 7021 |  | 
|---|
|  | 7022 | switch (_test) | 
|---|
|  | 7023 | { | 
|---|
|  | 7024 | case nodetest_name: | 
|---|
|  | 7025 | if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc); | 
|---|
|  | 7026 | break; | 
|---|
|  | 7027 |  | 
|---|
|  | 7028 | case nodetest_type_node: | 
|---|
|  | 7029 | ns.push_back(n, alloc); | 
|---|
|  | 7030 | break; | 
|---|
|  | 7031 |  | 
|---|
|  | 7032 | case nodetest_type_comment: | 
|---|
|  | 7033 | if (n.type() == node_comment) | 
|---|
|  | 7034 | ns.push_back(n, alloc); | 
|---|
|  | 7035 | break; | 
|---|
|  | 7036 |  | 
|---|
|  | 7037 | case nodetest_type_text: | 
|---|
|  | 7038 | if (n.type() == node_pcdata || n.type() == node_cdata) | 
|---|
|  | 7039 | ns.push_back(n, alloc); | 
|---|
|  | 7040 | break; | 
|---|
|  | 7041 |  | 
|---|
|  | 7042 | case nodetest_type_pi: | 
|---|
|  | 7043 | if (n.type() == node_pi) | 
|---|
|  | 7044 | ns.push_back(n, alloc); | 
|---|
|  | 7045 | break; | 
|---|
|  | 7046 |  | 
|---|
|  | 7047 | case nodetest_pi: | 
|---|
|  | 7048 | if (n.type() == node_pi && strequal(n.name(), _data.nodetest)) | 
|---|
|  | 7049 | ns.push_back(n, alloc); | 
|---|
|  | 7050 | break; | 
|---|
|  | 7051 |  | 
|---|
|  | 7052 | case nodetest_all: | 
|---|
|  | 7053 | if (n.type() == node_element) | 
|---|
|  | 7054 | ns.push_back(n, alloc); | 
|---|
|  | 7055 | break; | 
|---|
|  | 7056 |  | 
|---|
|  | 7057 | case nodetest_all_in_namespace: | 
|---|
|  | 7058 | if (n.type() == node_element && starts_with(n.name(), _data.nodetest)) | 
|---|
|  | 7059 | ns.push_back(n, alloc); | 
|---|
|  | 7060 | break; | 
|---|
|  | 7061 |  | 
|---|
|  | 7062 | default: | 
|---|
|  | 7063 | assert(!"Unknown axis"); | 
|---|
|  | 7064 | } | 
|---|
|  | 7065 | } | 
|---|
|  | 7066 |  | 
|---|
|  | 7067 | template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T) | 
|---|
|  | 7068 | { | 
|---|
|  | 7069 | const axis_t axis = T::axis; | 
|---|
|  | 7070 |  | 
|---|
|  | 7071 | switch (axis) | 
|---|
|  | 7072 | { | 
|---|
|  | 7073 | case axis_attribute: | 
|---|
|  | 7074 | { | 
|---|
|  | 7075 | for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) | 
|---|
|  | 7076 | step_push(ns, a, n, alloc); | 
|---|
|  | 7077 |  | 
|---|
|  | 7078 | break; | 
|---|
|  | 7079 | } | 
|---|
|  | 7080 |  | 
|---|
|  | 7081 | case axis_child: | 
|---|
|  | 7082 | { | 
|---|
|  | 7083 | for (xml_node c = n.first_child(); c; c = c.next_sibling()) | 
|---|
|  | 7084 | step_push(ns, c, alloc); | 
|---|
|  | 7085 |  | 
|---|
|  | 7086 | break; | 
|---|
|  | 7087 | } | 
|---|
|  | 7088 |  | 
|---|
|  | 7089 | case axis_descendant: | 
|---|
|  | 7090 | case axis_descendant_or_self: | 
|---|
|  | 7091 | { | 
|---|
|  | 7092 | if (axis == axis_descendant_or_self) | 
|---|
|  | 7093 | step_push(ns, n, alloc); | 
|---|
|  | 7094 |  | 
|---|
|  | 7095 | xml_node cur = n.first_child(); | 
|---|
|  | 7096 |  | 
|---|
|  | 7097 | while (cur && cur != n) | 
|---|
|  | 7098 | { | 
|---|
|  | 7099 | step_push(ns, cur, alloc); | 
|---|
|  | 7100 |  | 
|---|
|  | 7101 | if (cur.first_child()) | 
|---|
|  | 7102 | cur = cur.first_child(); | 
|---|
|  | 7103 | else if (cur.next_sibling()) | 
|---|
|  | 7104 | cur = cur.next_sibling(); | 
|---|
|  | 7105 | else | 
|---|
|  | 7106 | { | 
|---|
|  | 7107 | while (!cur.next_sibling() && cur != n) | 
|---|
|  | 7108 | cur = cur.parent(); | 
|---|
|  | 7109 |  | 
|---|
|  | 7110 | if (cur != n) cur = cur.next_sibling(); | 
|---|
|  | 7111 | } | 
|---|
|  | 7112 | } | 
|---|
|  | 7113 |  | 
|---|
|  | 7114 | break; | 
|---|
|  | 7115 | } | 
|---|
|  | 7116 |  | 
|---|
|  | 7117 | case axis_following_sibling: | 
|---|
|  | 7118 | { | 
|---|
|  | 7119 | for (xml_node c = n.next_sibling(); c; c = c.next_sibling()) | 
|---|
|  | 7120 | step_push(ns, c, alloc); | 
|---|
|  | 7121 |  | 
|---|
|  | 7122 | break; | 
|---|
|  | 7123 | } | 
|---|
|  | 7124 |  | 
|---|
|  | 7125 | case axis_preceding_sibling: | 
|---|
|  | 7126 | { | 
|---|
|  | 7127 | for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling()) | 
|---|
|  | 7128 | step_push(ns, c, alloc); | 
|---|
|  | 7129 |  | 
|---|
|  | 7130 | break; | 
|---|
|  | 7131 | } | 
|---|
|  | 7132 |  | 
|---|
|  | 7133 | case axis_following: | 
|---|
|  | 7134 | { | 
|---|
|  | 7135 | xml_node cur = n; | 
|---|
|  | 7136 |  | 
|---|
|  | 7137 | // exit from this node so that we don't include descendants | 
|---|
|  | 7138 | while (cur && !cur.next_sibling()) cur = cur.parent(); | 
|---|
|  | 7139 | cur = cur.next_sibling(); | 
|---|
|  | 7140 |  | 
|---|
|  | 7141 | for (;;) | 
|---|
|  | 7142 | { | 
|---|
|  | 7143 | step_push(ns, cur, alloc); | 
|---|
|  | 7144 |  | 
|---|
|  | 7145 | if (cur.first_child()) | 
|---|
|  | 7146 | cur = cur.first_child(); | 
|---|
|  | 7147 | else if (cur.next_sibling()) | 
|---|
|  | 7148 | cur = cur.next_sibling(); | 
|---|
|  | 7149 | else | 
|---|
|  | 7150 | { | 
|---|
|  | 7151 | while (cur && !cur.next_sibling()) cur = cur.parent(); | 
|---|
|  | 7152 | cur = cur.next_sibling(); | 
|---|
|  | 7153 |  | 
|---|
|  | 7154 | if (!cur) break; | 
|---|
|  | 7155 | } | 
|---|
|  | 7156 | } | 
|---|
|  | 7157 |  | 
|---|
|  | 7158 | break; | 
|---|
|  | 7159 | } | 
|---|
|  | 7160 |  | 
|---|
|  | 7161 | case axis_preceding: | 
|---|
|  | 7162 | { | 
|---|
|  | 7163 | xml_node cur = n; | 
|---|
|  | 7164 |  | 
|---|
|  | 7165 | while (cur && !cur.previous_sibling()) cur = cur.parent(); | 
|---|
|  | 7166 | cur = cur.previous_sibling(); | 
|---|
|  | 7167 |  | 
|---|
|  | 7168 | for (;;) | 
|---|
|  | 7169 | { | 
|---|
|  | 7170 | if (cur.last_child()) | 
|---|
|  | 7171 | cur = cur.last_child(); | 
|---|
|  | 7172 | else | 
|---|
|  | 7173 | { | 
|---|
|  | 7174 | // leaf node, can't be ancestor | 
|---|
|  | 7175 | step_push(ns, cur, alloc); | 
|---|
|  | 7176 |  | 
|---|
|  | 7177 | if (cur.previous_sibling()) | 
|---|
|  | 7178 | cur = cur.previous_sibling(); | 
|---|
|  | 7179 | else | 
|---|
|  | 7180 | { | 
|---|
|  | 7181 | do | 
|---|
|  | 7182 | { | 
|---|
|  | 7183 | cur = cur.parent(); | 
|---|
|  | 7184 | if (!cur) break; | 
|---|
|  | 7185 |  | 
|---|
|  | 7186 | if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc); | 
|---|
|  | 7187 | } | 
|---|
|  | 7188 | while (!cur.previous_sibling()); | 
|---|
|  | 7189 |  | 
|---|
|  | 7190 | cur = cur.previous_sibling(); | 
|---|
|  | 7191 |  | 
|---|
|  | 7192 | if (!cur) break; | 
|---|
|  | 7193 | } | 
|---|
|  | 7194 | } | 
|---|
|  | 7195 | } | 
|---|
|  | 7196 |  | 
|---|
|  | 7197 | break; | 
|---|
|  | 7198 | } | 
|---|
|  | 7199 |  | 
|---|
|  | 7200 | case axis_ancestor: | 
|---|
|  | 7201 | case axis_ancestor_or_self: | 
|---|
|  | 7202 | { | 
|---|
|  | 7203 | if (axis == axis_ancestor_or_self) | 
|---|
|  | 7204 | step_push(ns, n, alloc); | 
|---|
|  | 7205 |  | 
|---|
|  | 7206 | xml_node cur = n.parent(); | 
|---|
|  | 7207 |  | 
|---|
|  | 7208 | while (cur) | 
|---|
|  | 7209 | { | 
|---|
|  | 7210 | step_push(ns, cur, alloc); | 
|---|
|  | 7211 |  | 
|---|
|  | 7212 | cur = cur.parent(); | 
|---|
|  | 7213 | } | 
|---|
|  | 7214 |  | 
|---|
|  | 7215 | break; | 
|---|
|  | 7216 | } | 
|---|
|  | 7217 |  | 
|---|
|  | 7218 | case axis_self: | 
|---|
|  | 7219 | { | 
|---|
|  | 7220 | step_push(ns, n, alloc); | 
|---|
|  | 7221 |  | 
|---|
|  | 7222 | break; | 
|---|
|  | 7223 | } | 
|---|
|  | 7224 |  | 
|---|
|  | 7225 | case axis_parent: | 
|---|
|  | 7226 | { | 
|---|
|  | 7227 | if (n.parent()) step_push(ns, n.parent(), alloc); | 
|---|
|  | 7228 |  | 
|---|
|  | 7229 | break; | 
|---|
|  | 7230 | } | 
|---|
|  | 7231 |  | 
|---|
|  | 7232 | default: | 
|---|
|  | 7233 | assert(!"Unimplemented axis"); | 
|---|
|  | 7234 | } | 
|---|
|  | 7235 | } | 
|---|
|  | 7236 |  | 
|---|
|  | 7237 | template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v) | 
|---|
|  | 7238 | { | 
|---|
|  | 7239 | const axis_t axis = T::axis; | 
|---|
|  | 7240 |  | 
|---|
|  | 7241 | switch (axis) | 
|---|
|  | 7242 | { | 
|---|
|  | 7243 | case axis_ancestor: | 
|---|
|  | 7244 | case axis_ancestor_or_self: | 
|---|
|  | 7245 | { | 
|---|
|  | 7246 | if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test | 
|---|
|  | 7247 | step_push(ns, a, p, alloc); | 
|---|
|  | 7248 |  | 
|---|
|  | 7249 | xml_node cur = p; | 
|---|
|  | 7250 |  | 
|---|
|  | 7251 | while (cur) | 
|---|
|  | 7252 | { | 
|---|
|  | 7253 | step_push(ns, cur, alloc); | 
|---|
|  | 7254 |  | 
|---|
|  | 7255 | cur = cur.parent(); | 
|---|
|  | 7256 | } | 
|---|
|  | 7257 |  | 
|---|
|  | 7258 | break; | 
|---|
|  | 7259 | } | 
|---|
|  | 7260 |  | 
|---|
|  | 7261 | case axis_descendant_or_self: | 
|---|
|  | 7262 | case axis_self: | 
|---|
|  | 7263 | { | 
|---|
|  | 7264 | if (_test == nodetest_type_node) // reject attributes based on principal node type test | 
|---|
|  | 7265 | step_push(ns, a, p, alloc); | 
|---|
|  | 7266 |  | 
|---|
|  | 7267 | break; | 
|---|
|  | 7268 | } | 
|---|
|  | 7269 |  | 
|---|
|  | 7270 | case axis_following: | 
|---|
|  | 7271 | { | 
|---|
|  | 7272 | xml_node cur = p; | 
|---|
|  | 7273 |  | 
|---|
|  | 7274 | for (;;) | 
|---|
|  | 7275 | { | 
|---|
|  | 7276 | if (cur.first_child()) | 
|---|
|  | 7277 | cur = cur.first_child(); | 
|---|
|  | 7278 | else if (cur.next_sibling()) | 
|---|
|  | 7279 | cur = cur.next_sibling(); | 
|---|
|  | 7280 | else | 
|---|
|  | 7281 | { | 
|---|
|  | 7282 | while (cur && !cur.next_sibling()) cur = cur.parent(); | 
|---|
|  | 7283 | cur = cur.next_sibling(); | 
|---|
|  | 7284 |  | 
|---|
|  | 7285 | if (!cur) break; | 
|---|
|  | 7286 | } | 
|---|
|  | 7287 |  | 
|---|
|  | 7288 | step_push(ns, cur, alloc); | 
|---|
|  | 7289 | } | 
|---|
|  | 7290 |  | 
|---|
|  | 7291 | break; | 
|---|
|  | 7292 | } | 
|---|
|  | 7293 |  | 
|---|
|  | 7294 | case axis_parent: | 
|---|
|  | 7295 | { | 
|---|
|  | 7296 | step_push(ns, p, alloc); | 
|---|
|  | 7297 |  | 
|---|
|  | 7298 | break; | 
|---|
|  | 7299 | } | 
|---|
|  | 7300 |  | 
|---|
|  | 7301 | case axis_preceding: | 
|---|
|  | 7302 | { | 
|---|
|  | 7303 | // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding | 
|---|
|  | 7304 | step_fill(ns, p, alloc, v); | 
|---|
|  | 7305 | break; | 
|---|
|  | 7306 | } | 
|---|
|  | 7307 |  | 
|---|
|  | 7308 | default: | 
|---|
|  | 7309 | assert(!"Unimplemented axis"); | 
|---|
|  | 7310 | } | 
|---|
|  | 7311 | } | 
|---|
|  | 7312 |  | 
|---|
|  | 7313 | template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v) | 
|---|
|  | 7314 | { | 
|---|
|  | 7315 | const axis_t axis = T::axis; | 
|---|
|  | 7316 | bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); | 
|---|
|  | 7317 |  | 
|---|
|  | 7318 | xpath_node_set_raw ns; | 
|---|
|  | 7319 | ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted); | 
|---|
|  | 7320 |  | 
|---|
|  | 7321 | if (_left) | 
|---|
|  | 7322 | { | 
|---|
|  | 7323 | xpath_node_set_raw s = _left->eval_node_set(c, stack); | 
|---|
|  | 7324 |  | 
|---|
|  | 7325 | // self axis preserves the original order | 
|---|
|  | 7326 | if (axis == axis_self) ns.set_type(s.type()); | 
|---|
|  | 7327 |  | 
|---|
|  | 7328 | for (const xpath_node* it = s.begin(); it != s.end(); ++it) | 
|---|
|  | 7329 | { | 
|---|
|  | 7330 | size_t size = ns.size(); | 
|---|
|  | 7331 |  | 
|---|
|  | 7332 | // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes | 
|---|
|  | 7333 | if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); | 
|---|
|  | 7334 |  | 
|---|
|  | 7335 | if (it->node()) | 
|---|
|  | 7336 | step_fill(ns, it->node(), stack.result, v); | 
|---|
|  | 7337 | else if (attributes) | 
|---|
|  | 7338 | step_fill(ns, it->attribute(), it->parent(), stack.result, v); | 
|---|
|  | 7339 |  | 
|---|
|  | 7340 | apply_predicates(ns, size, stack); | 
|---|
|  | 7341 | } | 
|---|
|  | 7342 | } | 
|---|
|  | 7343 | else | 
|---|
|  | 7344 | { | 
|---|
|  | 7345 | if (c.n.node()) | 
|---|
|  | 7346 | step_fill(ns, c.n.node(), stack.result, v); | 
|---|
|  | 7347 | else if (attributes) | 
|---|
|  | 7348 | step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v); | 
|---|
|  | 7349 |  | 
|---|
|  | 7350 | apply_predicates(ns, 0, stack); | 
|---|
|  | 7351 | } | 
|---|
|  | 7352 |  | 
|---|
|  | 7353 | // child, attribute and self axes always generate unique set of nodes | 
|---|
|  | 7354 | // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice | 
|---|
|  | 7355 | if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) | 
|---|
|  | 7356 | ns.remove_duplicates(); | 
|---|
|  | 7357 |  | 
|---|
|  | 7358 | return ns; | 
|---|
|  | 7359 | } | 
|---|
|  | 7360 |  | 
|---|
|  | 7361 | public: | 
|---|
|  | 7362 | xpath_ast_node(ast_type_t type, xpath_value_type rettype, const char_t* value): | 
|---|
|  | 7363 | _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0) | 
|---|
|  | 7364 | { | 
|---|
|  | 7365 | assert(type == ast_string_constant); | 
|---|
|  | 7366 | _data.string = value; | 
|---|
|  | 7367 | } | 
|---|
|  | 7368 |  | 
|---|
|  | 7369 | xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value): | 
|---|
|  | 7370 | _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0) | 
|---|
|  | 7371 | { | 
|---|
|  | 7372 | assert(type == ast_number_constant); | 
|---|
|  | 7373 | _data.number = value; | 
|---|
|  | 7374 | } | 
|---|
|  | 7375 |  | 
|---|
|  | 7376 | xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value): | 
|---|
|  | 7377 | _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0) | 
|---|
|  | 7378 | { | 
|---|
|  | 7379 | assert(type == ast_variable); | 
|---|
|  | 7380 | _data.variable = value; | 
|---|
|  | 7381 | } | 
|---|
|  | 7382 |  | 
|---|
|  | 7383 | xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0): | 
|---|
|  | 7384 | _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(left), _right(right), _next(0) | 
|---|
|  | 7385 | { | 
|---|
|  | 7386 | } | 
|---|
|  | 7387 |  | 
|---|
|  | 7388 | xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): | 
|---|
|  | 7389 | _type((char)type), _rettype(xpath_type_node_set), _axis((char)axis), _test((char)test), _left(left), _right(0), _next(0) | 
|---|
|  | 7390 | { | 
|---|
|  | 7391 | _data.nodetest = contents; | 
|---|
|  | 7392 | } | 
|---|
|  | 7393 |  | 
|---|
|  | 7394 | void set_next(xpath_ast_node* value) | 
|---|
|  | 7395 | { | 
|---|
|  | 7396 | _next = value; | 
|---|
|  | 7397 | } | 
|---|
|  | 7398 |  | 
|---|
|  | 7399 | void set_right(xpath_ast_node* value) | 
|---|
|  | 7400 | { | 
|---|
|  | 7401 | _right = value; | 
|---|
|  | 7402 | } | 
|---|
|  | 7403 |  | 
|---|
|  | 7404 | bool eval_boolean(const xpath_context& c, const xpath_stack& stack) | 
|---|
|  | 7405 | { | 
|---|
|  | 7406 | switch (_type) | 
|---|
|  | 7407 | { | 
|---|
|  | 7408 | case ast_op_or: | 
|---|
|  | 7409 | return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); | 
|---|
|  | 7410 |  | 
|---|
|  | 7411 | case ast_op_and: | 
|---|
|  | 7412 | return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); | 
|---|
|  | 7413 |  | 
|---|
|  | 7414 | case ast_op_equal: | 
|---|
|  | 7415 | return compare_eq(_left, _right, c, stack, equal_to()); | 
|---|
|  | 7416 |  | 
|---|
|  | 7417 | case ast_op_not_equal: | 
|---|
|  | 7418 | return compare_eq(_left, _right, c, stack, not_equal_to()); | 
|---|
|  | 7419 |  | 
|---|
|  | 7420 | case ast_op_less: | 
|---|
|  | 7421 | return compare_rel(_left, _right, c, stack, less()); | 
|---|
|  | 7422 |  | 
|---|
|  | 7423 | case ast_op_greater: | 
|---|
|  | 7424 | return compare_rel(_right, _left, c, stack, less()); | 
|---|
|  | 7425 |  | 
|---|
|  | 7426 | case ast_op_less_or_equal: | 
|---|
|  | 7427 | return compare_rel(_left, _right, c, stack, less_equal()); | 
|---|
|  | 7428 |  | 
|---|
|  | 7429 | case ast_op_greater_or_equal: | 
|---|
|  | 7430 | return compare_rel(_right, _left, c, stack, less_equal()); | 
|---|
|  | 7431 |  | 
|---|
|  | 7432 | case ast_func_starts_with: | 
|---|
|  | 7433 | { | 
|---|
|  | 7434 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7435 |  | 
|---|
|  | 7436 | xpath_string lr = _left->eval_string(c, stack); | 
|---|
|  | 7437 | xpath_string rr = _right->eval_string(c, stack); | 
|---|
|  | 7438 |  | 
|---|
|  | 7439 | return starts_with(lr.c_str(), rr.c_str()); | 
|---|
|  | 7440 | } | 
|---|
|  | 7441 |  | 
|---|
|  | 7442 | case ast_func_contains: | 
|---|
|  | 7443 | { | 
|---|
|  | 7444 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7445 |  | 
|---|
|  | 7446 | xpath_string lr = _left->eval_string(c, stack); | 
|---|
|  | 7447 | xpath_string rr = _right->eval_string(c, stack); | 
|---|
|  | 7448 |  | 
|---|
|  | 7449 | return find_substring(lr.c_str(), rr.c_str()) != 0; | 
|---|
|  | 7450 | } | 
|---|
|  | 7451 |  | 
|---|
|  | 7452 | case ast_func_boolean: | 
|---|
|  | 7453 | return _left->eval_boolean(c, stack); | 
|---|
|  | 7454 |  | 
|---|
|  | 7455 | case ast_func_not: | 
|---|
|  | 7456 | return !_left->eval_boolean(c, stack); | 
|---|
|  | 7457 |  | 
|---|
|  | 7458 | case ast_func_true: | 
|---|
|  | 7459 | return true; | 
|---|
|  | 7460 |  | 
|---|
|  | 7461 | case ast_func_false: | 
|---|
|  | 7462 | return false; | 
|---|
|  | 7463 |  | 
|---|
|  | 7464 | case ast_func_lang: | 
|---|
|  | 7465 | { | 
|---|
|  | 7466 | if (c.n.attribute()) return false; | 
|---|
|  | 7467 |  | 
|---|
|  | 7468 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7469 |  | 
|---|
|  | 7470 | xpath_string lang = _left->eval_string(c, stack); | 
|---|
|  | 7471 |  | 
|---|
|  | 7472 | for (xml_node n = c.n.node(); n; n = n.parent()) | 
|---|
|  | 7473 | { | 
|---|
|  | 7474 | xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); | 
|---|
|  | 7475 |  | 
|---|
|  | 7476 | if (a) | 
|---|
|  | 7477 | { | 
|---|
|  | 7478 | const char_t* value = a.value(); | 
|---|
|  | 7479 |  | 
|---|
|  | 7480 | // strnicmp / strncasecmp is not portable | 
|---|
|  | 7481 | for (const char_t* lit = lang.c_str(); *lit; ++lit) | 
|---|
|  | 7482 | { | 
|---|
|  | 7483 | if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; | 
|---|
|  | 7484 | ++value; | 
|---|
|  | 7485 | } | 
|---|
|  | 7486 |  | 
|---|
|  | 7487 | return *value == 0 || *value == '-'; | 
|---|
|  | 7488 | } | 
|---|
|  | 7489 | } | 
|---|
|  | 7490 |  | 
|---|
|  | 7491 | return false; | 
|---|
|  | 7492 | } | 
|---|
|  | 7493 |  | 
|---|
|  | 7494 | case ast_variable: | 
|---|
|  | 7495 | { | 
|---|
|  | 7496 | assert(_rettype == _data.variable->type()); | 
|---|
|  | 7497 |  | 
|---|
|  | 7498 | if (_rettype == xpath_type_boolean) | 
|---|
|  | 7499 | return _data.variable->get_boolean(); | 
|---|
|  | 7500 |  | 
|---|
|  | 7501 | // fallthrough to type conversion | 
|---|
|  | 7502 | } | 
|---|
|  | 7503 |  | 
|---|
|  | 7504 | default: | 
|---|
|  | 7505 | { | 
|---|
|  | 7506 | switch (_rettype) | 
|---|
|  | 7507 | { | 
|---|
|  | 7508 | case xpath_type_number: | 
|---|
|  | 7509 | return convert_number_to_boolean(eval_number(c, stack)); | 
|---|
|  | 7510 |  | 
|---|
|  | 7511 | case xpath_type_string: | 
|---|
|  | 7512 | { | 
|---|
|  | 7513 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7514 |  | 
|---|
|  | 7515 | return !eval_string(c, stack).empty(); | 
|---|
|  | 7516 | } | 
|---|
|  | 7517 |  | 
|---|
|  | 7518 | case xpath_type_node_set: | 
|---|
|  | 7519 | { | 
|---|
|  | 7520 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7521 |  | 
|---|
|  | 7522 | return !eval_node_set(c, stack).empty(); | 
|---|
|  | 7523 | } | 
|---|
|  | 7524 |  | 
|---|
|  | 7525 | default: | 
|---|
|  | 7526 | assert(!"Wrong expression for return type boolean"); | 
|---|
|  | 7527 | return false; | 
|---|
|  | 7528 | } | 
|---|
|  | 7529 | } | 
|---|
|  | 7530 | } | 
|---|
|  | 7531 | } | 
|---|
|  | 7532 |  | 
|---|
|  | 7533 | double eval_number(const xpath_context& c, const xpath_stack& stack) | 
|---|
|  | 7534 | { | 
|---|
|  | 7535 | switch (_type) | 
|---|
|  | 7536 | { | 
|---|
|  | 7537 | case ast_op_add: | 
|---|
|  | 7538 | return _left->eval_number(c, stack) + _right->eval_number(c, stack); | 
|---|
|  | 7539 |  | 
|---|
|  | 7540 | case ast_op_subtract: | 
|---|
|  | 7541 | return _left->eval_number(c, stack) - _right->eval_number(c, stack); | 
|---|
|  | 7542 |  | 
|---|
|  | 7543 | case ast_op_multiply: | 
|---|
|  | 7544 | return _left->eval_number(c, stack) * _right->eval_number(c, stack); | 
|---|
|  | 7545 |  | 
|---|
|  | 7546 | case ast_op_divide: | 
|---|
|  | 7547 | return _left->eval_number(c, stack) / _right->eval_number(c, stack); | 
|---|
|  | 7548 |  | 
|---|
|  | 7549 | case ast_op_mod: | 
|---|
|  | 7550 | return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); | 
|---|
|  | 7551 |  | 
|---|
|  | 7552 | case ast_op_negate: | 
|---|
|  | 7553 | return -_left->eval_number(c, stack); | 
|---|
|  | 7554 |  | 
|---|
|  | 7555 | case ast_number_constant: | 
|---|
|  | 7556 | return _data.number; | 
|---|
|  | 7557 |  | 
|---|
|  | 7558 | case ast_func_last: | 
|---|
|  | 7559 | return (double)c.size; | 
|---|
|  | 7560 |  | 
|---|
|  | 7561 | case ast_func_position: | 
|---|
|  | 7562 | return (double)c.position; | 
|---|
|  | 7563 |  | 
|---|
|  | 7564 | case ast_func_count: | 
|---|
|  | 7565 | { | 
|---|
|  | 7566 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7567 |  | 
|---|
|  | 7568 | return (double)_left->eval_node_set(c, stack).size(); | 
|---|
|  | 7569 | } | 
|---|
|  | 7570 |  | 
|---|
|  | 7571 | case ast_func_string_length_0: | 
|---|
|  | 7572 | { | 
|---|
|  | 7573 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7574 |  | 
|---|
|  | 7575 | return (double)string_value(c.n, stack.result).length(); | 
|---|
|  | 7576 | } | 
|---|
|  | 7577 |  | 
|---|
|  | 7578 | case ast_func_string_length_1: | 
|---|
|  | 7579 | { | 
|---|
|  | 7580 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7581 |  | 
|---|
|  | 7582 | return (double)_left->eval_string(c, stack).length(); | 
|---|
|  | 7583 | } | 
|---|
|  | 7584 |  | 
|---|
|  | 7585 | case ast_func_number_0: | 
|---|
|  | 7586 | { | 
|---|
|  | 7587 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7588 |  | 
|---|
|  | 7589 | return convert_string_to_number(string_value(c.n, stack.result).c_str()); | 
|---|
|  | 7590 | } | 
|---|
|  | 7591 |  | 
|---|
|  | 7592 | case ast_func_number_1: | 
|---|
|  | 7593 | return _left->eval_number(c, stack); | 
|---|
|  | 7594 |  | 
|---|
|  | 7595 | case ast_func_sum: | 
|---|
|  | 7596 | { | 
|---|
|  | 7597 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7598 |  | 
|---|
|  | 7599 | double r = 0; | 
|---|
|  | 7600 |  | 
|---|
|  | 7601 | xpath_node_set_raw ns = _left->eval_node_set(c, stack); | 
|---|
|  | 7602 |  | 
|---|
|  | 7603 | for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) | 
|---|
|  | 7604 | { | 
|---|
|  | 7605 | xpath_allocator_capture cri(stack.result); | 
|---|
|  | 7606 |  | 
|---|
|  | 7607 | r += convert_string_to_number(string_value(*it, stack.result).c_str()); | 
|---|
|  | 7608 | } | 
|---|
|  | 7609 |  | 
|---|
|  | 7610 | return r; | 
|---|
|  | 7611 | } | 
|---|
|  | 7612 |  | 
|---|
|  | 7613 | case ast_func_floor: | 
|---|
|  | 7614 | { | 
|---|
|  | 7615 | double r = _left->eval_number(c, stack); | 
|---|
|  | 7616 |  | 
|---|
|  | 7617 | return r == r ? floor(r) : r; | 
|---|
|  | 7618 | } | 
|---|
|  | 7619 |  | 
|---|
|  | 7620 | case ast_func_ceiling: | 
|---|
|  | 7621 | { | 
|---|
|  | 7622 | double r = _left->eval_number(c, stack); | 
|---|
|  | 7623 |  | 
|---|
|  | 7624 | return r == r ? ceil(r) : r; | 
|---|
|  | 7625 | } | 
|---|
|  | 7626 |  | 
|---|
|  | 7627 | case ast_func_round: | 
|---|
|  | 7628 | return round_nearest_nzero(_left->eval_number(c, stack)); | 
|---|
|  | 7629 |  | 
|---|
|  | 7630 | case ast_variable: | 
|---|
|  | 7631 | { | 
|---|
|  | 7632 | assert(_rettype == _data.variable->type()); | 
|---|
|  | 7633 |  | 
|---|
|  | 7634 | if (_rettype == xpath_type_number) | 
|---|
|  | 7635 | return _data.variable->get_number(); | 
|---|
|  | 7636 |  | 
|---|
|  | 7637 | // fallthrough to type conversion | 
|---|
|  | 7638 | } | 
|---|
|  | 7639 |  | 
|---|
|  | 7640 | default: | 
|---|
|  | 7641 | { | 
|---|
|  | 7642 | switch (_rettype) | 
|---|
|  | 7643 | { | 
|---|
|  | 7644 | case xpath_type_boolean: | 
|---|
|  | 7645 | return eval_boolean(c, stack) ? 1 : 0; | 
|---|
|  | 7646 |  | 
|---|
|  | 7647 | case xpath_type_string: | 
|---|
|  | 7648 | { | 
|---|
|  | 7649 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7650 |  | 
|---|
|  | 7651 | return convert_string_to_number(eval_string(c, stack).c_str()); | 
|---|
|  | 7652 | } | 
|---|
|  | 7653 |  | 
|---|
|  | 7654 | case xpath_type_node_set: | 
|---|
|  | 7655 | { | 
|---|
|  | 7656 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7657 |  | 
|---|
|  | 7658 | return convert_string_to_number(eval_string(c, stack).c_str()); | 
|---|
|  | 7659 | } | 
|---|
|  | 7660 |  | 
|---|
|  | 7661 | default: | 
|---|
|  | 7662 | assert(!"Wrong expression for return type number"); | 
|---|
|  | 7663 | return 0; | 
|---|
|  | 7664 | } | 
|---|
|  | 7665 |  | 
|---|
|  | 7666 | } | 
|---|
|  | 7667 | } | 
|---|
|  | 7668 | } | 
|---|
|  | 7669 |  | 
|---|
|  | 7670 | xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) | 
|---|
|  | 7671 | { | 
|---|
|  | 7672 | assert(_type == ast_func_concat); | 
|---|
|  | 7673 |  | 
|---|
|  | 7674 | xpath_allocator_capture ct(stack.temp); | 
|---|
|  | 7675 |  | 
|---|
|  | 7676 | // count the string number | 
|---|
|  | 7677 | size_t count = 1; | 
|---|
|  | 7678 | for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; | 
|---|
|  | 7679 |  | 
|---|
|  | 7680 | // gather all strings | 
|---|
|  | 7681 | xpath_string static_buffer[4]; | 
|---|
|  | 7682 | xpath_string* buffer = static_buffer; | 
|---|
|  | 7683 |  | 
|---|
|  | 7684 | // allocate on-heap for large concats | 
|---|
|  | 7685 | if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) | 
|---|
|  | 7686 | { | 
|---|
|  | 7687 | buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); | 
|---|
|  | 7688 | assert(buffer); | 
|---|
|  | 7689 | } | 
|---|
|  | 7690 |  | 
|---|
|  | 7691 | // evaluate all strings to temporary stack | 
|---|
|  | 7692 | xpath_stack swapped_stack = {stack.temp, stack.result}; | 
|---|
|  | 7693 |  | 
|---|
|  | 7694 | buffer[0] = _left->eval_string(c, swapped_stack); | 
|---|
|  | 7695 |  | 
|---|
|  | 7696 | size_t pos = 1; | 
|---|
|  | 7697 | for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); | 
|---|
|  | 7698 | assert(pos == count); | 
|---|
|  | 7699 |  | 
|---|
|  | 7700 | // get total length | 
|---|
|  | 7701 | size_t length = 0; | 
|---|
|  | 7702 | for (size_t i = 0; i < count; ++i) length += buffer[i].length(); | 
|---|
|  | 7703 |  | 
|---|
|  | 7704 | // create final string | 
|---|
|  | 7705 | char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); | 
|---|
|  | 7706 | assert(result); | 
|---|
|  | 7707 |  | 
|---|
|  | 7708 | char_t* ri = result; | 
|---|
|  | 7709 |  | 
|---|
|  | 7710 | for (size_t j = 0; j < count; ++j) | 
|---|
|  | 7711 | for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) | 
|---|
|  | 7712 | *ri++ = *bi; | 
|---|
|  | 7713 |  | 
|---|
|  | 7714 | *ri = 0; | 
|---|
|  | 7715 |  | 
|---|
|  | 7716 | return xpath_string(result, true); | 
|---|
|  | 7717 | } | 
|---|
|  | 7718 |  | 
|---|
|  | 7719 | xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) | 
|---|
|  | 7720 | { | 
|---|
|  | 7721 | switch (_type) | 
|---|
|  | 7722 | { | 
|---|
|  | 7723 | case ast_string_constant: | 
|---|
|  | 7724 | return xpath_string_const(_data.string); | 
|---|
|  | 7725 |  | 
|---|
|  | 7726 | case ast_func_local_name_0: | 
|---|
|  | 7727 | { | 
|---|
|  | 7728 | xpath_node na = c.n; | 
|---|
|  | 7729 |  | 
|---|
|  | 7730 | return xpath_string_const(local_name(na)); | 
|---|
|  | 7731 | } | 
|---|
|  | 7732 |  | 
|---|
|  | 7733 | case ast_func_local_name_1: | 
|---|
|  | 7734 | { | 
|---|
|  | 7735 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7736 |  | 
|---|
|  | 7737 | xpath_node_set_raw ns = _left->eval_node_set(c, stack); | 
|---|
|  | 7738 | xpath_node na = ns.first(); | 
|---|
|  | 7739 |  | 
|---|
|  | 7740 | return xpath_string_const(local_name(na)); | 
|---|
|  | 7741 | } | 
|---|
|  | 7742 |  | 
|---|
|  | 7743 | case ast_func_name_0: | 
|---|
|  | 7744 | { | 
|---|
|  | 7745 | xpath_node na = c.n; | 
|---|
|  | 7746 |  | 
|---|
|  | 7747 | return xpath_string_const(qualified_name(na)); | 
|---|
|  | 7748 | } | 
|---|
|  | 7749 |  | 
|---|
|  | 7750 | case ast_func_name_1: | 
|---|
|  | 7751 | { | 
|---|
|  | 7752 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7753 |  | 
|---|
|  | 7754 | xpath_node_set_raw ns = _left->eval_node_set(c, stack); | 
|---|
|  | 7755 | xpath_node na = ns.first(); | 
|---|
|  | 7756 |  | 
|---|
|  | 7757 | return xpath_string_const(qualified_name(na)); | 
|---|
|  | 7758 | } | 
|---|
|  | 7759 |  | 
|---|
|  | 7760 | case ast_func_namespace_uri_0: | 
|---|
|  | 7761 | { | 
|---|
|  | 7762 | xpath_node na = c.n; | 
|---|
|  | 7763 |  | 
|---|
|  | 7764 | return xpath_string_const(namespace_uri(na)); | 
|---|
|  | 7765 | } | 
|---|
|  | 7766 |  | 
|---|
|  | 7767 | case ast_func_namespace_uri_1: | 
|---|
|  | 7768 | { | 
|---|
|  | 7769 | xpath_allocator_capture cr(stack.result); | 
|---|
|  | 7770 |  | 
|---|
|  | 7771 | xpath_node_set_raw ns = _left->eval_node_set(c, stack); | 
|---|
|  | 7772 | xpath_node na = ns.first(); | 
|---|
|  | 7773 |  | 
|---|
|  | 7774 | return xpath_string_const(namespace_uri(na)); | 
|---|
|  | 7775 | } | 
|---|
|  | 7776 |  | 
|---|
|  | 7777 | case ast_func_string_0: | 
|---|
|  | 7778 | return string_value(c.n, stack.result); | 
|---|
|  | 7779 |  | 
|---|
|  | 7780 | case ast_func_string_1: | 
|---|
|  | 7781 | return _left->eval_string(c, stack); | 
|---|
|  | 7782 |  | 
|---|
|  | 7783 | case ast_func_concat: | 
|---|
|  | 7784 | return eval_string_concat(c, stack); | 
|---|
|  | 7785 |  | 
|---|
|  | 7786 | case ast_func_substring_before: | 
|---|
|  | 7787 | { | 
|---|
|  | 7788 | xpath_allocator_capture cr(stack.temp); | 
|---|
|  | 7789 |  | 
|---|
|  | 7790 | xpath_stack swapped_stack = {stack.temp, stack.result}; | 
|---|
|  | 7791 |  | 
|---|
|  | 7792 | xpath_string s = _left->eval_string(c, swapped_stack); | 
|---|
|  | 7793 | xpath_string p = _right->eval_string(c, swapped_stack); | 
|---|
|  | 7794 |  | 
|---|
|  | 7795 | const char_t* pos = find_substring(s.c_str(), p.c_str()); | 
|---|
|  | 7796 |  | 
|---|
|  | 7797 | return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string(); | 
|---|
|  | 7798 | } | 
|---|
|  | 7799 |  | 
|---|
|  | 7800 | case ast_func_substring_after: | 
|---|
|  | 7801 | { | 
|---|
|  | 7802 | xpath_allocator_capture cr(stack.temp); | 
|---|
|  | 7803 |  | 
|---|
|  | 7804 | xpath_stack swapped_stack = {stack.temp, stack.result}; | 
|---|
|  | 7805 |  | 
|---|
|  | 7806 | xpath_string s = _left->eval_string(c, swapped_stack); | 
|---|
|  | 7807 | xpath_string p = _right->eval_string(c, swapped_stack); | 
|---|
|  | 7808 |  | 
|---|
|  | 7809 | const char_t* pos = find_substring(s.c_str(), p.c_str()); | 
|---|
|  | 7810 | if (!pos) return xpath_string(); | 
|---|
|  | 7811 |  | 
|---|
|  | 7812 | const char_t* result = pos + p.length(); | 
|---|
|  | 7813 |  | 
|---|
|  | 7814 | return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result); | 
|---|
|  | 7815 | } | 
|---|
|  | 7816 |  | 
|---|
|  | 7817 | case ast_func_substring_2: | 
|---|
|  | 7818 | { | 
|---|
|  | 7819 | xpath_allocator_capture cr(stack.temp); | 
|---|
|  | 7820 |  | 
|---|
|  | 7821 | xpath_stack swapped_stack = {stack.temp, stack.result}; | 
|---|
|  | 7822 |  | 
|---|
|  | 7823 | xpath_string s = _left->eval_string(c, swapped_stack); | 
|---|
|  | 7824 | size_t s_length = s.length(); | 
|---|
|  | 7825 |  | 
|---|
|  | 7826 | double first = round_nearest(_right->eval_number(c, stack)); | 
|---|
|  | 7827 |  | 
|---|
|  | 7828 | if (is_nan(first)) return xpath_string(); // NaN | 
|---|
|  | 7829 | else if (first >= s_length + 1) return xpath_string(); | 
|---|
|  | 7830 |  | 
|---|
|  | 7831 | size_t pos = first < 1 ? 1 : (size_t)first; | 
|---|
|  | 7832 | assert(1 <= pos && pos <= s_length + 1); | 
|---|
|  | 7833 |  | 
|---|
|  | 7834 | const char_t* rbegin = s.c_str() + (pos - 1); | 
|---|
|  | 7835 |  | 
|---|
|  | 7836 | return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin); | 
|---|
|  | 7837 | } | 
|---|
|  | 7838 |  | 
|---|
|  | 7839 | case ast_func_substring_3: | 
|---|
|  | 7840 | { | 
|---|
|  | 7841 | xpath_allocator_capture cr(stack.temp); | 
|---|
|  | 7842 |  | 
|---|
|  | 7843 | xpath_stack swapped_stack = {stack.temp, stack.result}; | 
|---|
|  | 7844 |  | 
|---|
|  | 7845 | xpath_string s = _left->eval_string(c, swapped_stack); | 
|---|
|  | 7846 | size_t s_length = s.length(); | 
|---|
|  | 7847 |  | 
|---|
|  | 7848 | double first = round_nearest(_right->eval_number(c, stack)); | 
|---|
|  | 7849 | double last = first + round_nearest(_right->_next->eval_number(c, stack)); | 
|---|
|  | 7850 |  | 
|---|
|  | 7851 | if (is_nan(first) || is_nan(last)) return xpath_string(); | 
|---|
|  | 7852 | else if (first >= s_length + 1) return xpath_string(); | 
|---|
|  | 7853 | else if (first >= last) return xpath_string(); | 
|---|
|  | 7854 | else if (last < 1) return xpath_string(); | 
|---|
|  | 7855 |  | 
|---|
|  | 7856 | size_t pos = first < 1 ? 1 : (size_t)first; | 
|---|
|  | 7857 | size_t end = last >= s_length + 1 ? s_length + 1 : (size_t)last; | 
|---|
|  | 7858 |  | 
|---|
|  | 7859 | assert(1 <= pos && pos <= end && end <= s_length + 1); | 
|---|
|  | 7860 | const char_t* rbegin = s.c_str() + (pos - 1); | 
|---|
|  | 7861 | const char_t* rend = s.c_str() + (end - 1); | 
|---|
|  | 7862 |  | 
|---|
|  | 7863 | return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result); | 
|---|
|  | 7864 | } | 
|---|
|  | 7865 |  | 
|---|
|  | 7866 | case ast_func_normalize_space_0: | 
|---|
|  | 7867 | { | 
|---|
|  | 7868 | xpath_string s = string_value(c.n, stack.result); | 
|---|
|  | 7869 |  | 
|---|
|  | 7870 | normalize_space(s.data(stack.result)); | 
|---|
|  | 7871 |  | 
|---|
|  | 7872 | return s; | 
|---|
|  | 7873 | } | 
|---|
|  | 7874 |  | 
|---|
|  | 7875 | case ast_func_normalize_space_1: | 
|---|
|  | 7876 | { | 
|---|
|  | 7877 | xpath_string s = _left->eval_string(c, stack); | 
|---|
|  | 7878 |  | 
|---|
|  | 7879 | normalize_space(s.data(stack.result)); | 
|---|
|  | 7880 |  | 
|---|
|  | 7881 | return s; | 
|---|
|  | 7882 | } | 
|---|
|  | 7883 |  | 
|---|
|  | 7884 | case ast_func_translate: | 
|---|
|  | 7885 | { | 
|---|
|  | 7886 | xpath_allocator_capture cr(stack.temp); | 
|---|
|  | 7887 |  | 
|---|
|  | 7888 | xpath_stack swapped_stack = {stack.temp, stack.result}; | 
|---|
|  | 7889 |  | 
|---|
|  | 7890 | xpath_string s = _left->eval_string(c, stack); | 
|---|
|  | 7891 | xpath_string from = _right->eval_string(c, swapped_stack); | 
|---|
|  | 7892 | xpath_string to = _right->_next->eval_string(c, swapped_stack); | 
|---|
|  | 7893 |  | 
|---|
|  | 7894 | translate(s.data(stack.result), from.c_str(), to.c_str()); | 
|---|
|  | 7895 |  | 
|---|
|  | 7896 | return s; | 
|---|
|  | 7897 | } | 
|---|
|  | 7898 |  | 
|---|
|  | 7899 | case ast_variable: | 
|---|
|  | 7900 | { | 
|---|
|  | 7901 | assert(_rettype == _data.variable->type()); | 
|---|
|  | 7902 |  | 
|---|
|  | 7903 | if (_rettype == xpath_type_string) | 
|---|
|  | 7904 | return xpath_string_const(_data.variable->get_string()); | 
|---|
|  | 7905 |  | 
|---|
|  | 7906 | // fallthrough to type conversion | 
|---|
|  | 7907 | } | 
|---|
|  | 7908 |  | 
|---|
|  | 7909 | default: | 
|---|
|  | 7910 | { | 
|---|
|  | 7911 | switch (_rettype) | 
|---|
|  | 7912 | { | 
|---|
|  | 7913 | case xpath_type_boolean: | 
|---|
|  | 7914 | return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); | 
|---|
|  | 7915 |  | 
|---|
|  | 7916 | case xpath_type_number: | 
|---|
|  | 7917 | return convert_number_to_string(eval_number(c, stack), stack.result); | 
|---|
|  | 7918 |  | 
|---|
|  | 7919 | case xpath_type_node_set: | 
|---|
|  | 7920 | { | 
|---|
|  | 7921 | xpath_allocator_capture cr(stack.temp); | 
|---|
|  | 7922 |  | 
|---|
|  | 7923 | xpath_stack swapped_stack = {stack.temp, stack.result}; | 
|---|
|  | 7924 |  | 
|---|
|  | 7925 | xpath_node_set_raw ns = eval_node_set(c, swapped_stack); | 
|---|
|  | 7926 | return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); | 
|---|
|  | 7927 | } | 
|---|
|  | 7928 |  | 
|---|
|  | 7929 | default: | 
|---|
|  | 7930 | assert(!"Wrong expression for return type string"); | 
|---|
|  | 7931 | return xpath_string(); | 
|---|
|  | 7932 | } | 
|---|
|  | 7933 | } | 
|---|
|  | 7934 | } | 
|---|
|  | 7935 | } | 
|---|
|  | 7936 |  | 
|---|
|  | 7937 | xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack) | 
|---|
|  | 7938 | { | 
|---|
|  | 7939 | switch (_type) | 
|---|
|  | 7940 | { | 
|---|
|  | 7941 | case ast_op_union: | 
|---|
|  | 7942 | { | 
|---|
|  | 7943 | xpath_allocator_capture cr(stack.temp); | 
|---|
|  | 7944 |  | 
|---|
|  | 7945 | xpath_stack swapped_stack = {stack.temp, stack.result}; | 
|---|
|  | 7946 |  | 
|---|
|  | 7947 | xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack); | 
|---|
|  | 7948 | xpath_node_set_raw rs = _right->eval_node_set(c, stack); | 
|---|
|  | 7949 |  | 
|---|
|  | 7950 | // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother | 
|---|
|  | 7951 | rs.set_type(xpath_node_set::type_unsorted); | 
|---|
|  | 7952 |  | 
|---|
|  | 7953 | rs.append(ls.begin(), ls.end(), stack.result); | 
|---|
|  | 7954 | rs.remove_duplicates(); | 
|---|
|  | 7955 |  | 
|---|
|  | 7956 | return rs; | 
|---|
|  | 7957 | } | 
|---|
|  | 7958 |  | 
|---|
|  | 7959 | case ast_filter: | 
|---|
|  | 7960 | case ast_filter_posinv: | 
|---|
|  | 7961 | { | 
|---|
|  | 7962 | xpath_node_set_raw set = _left->eval_node_set(c, stack); | 
|---|
|  | 7963 |  | 
|---|
|  | 7964 | // either expression is a number or it contains position() call; sort by document order | 
|---|
|  | 7965 | if (_type == ast_filter) set.sort_do(); | 
|---|
|  | 7966 |  | 
|---|
|  | 7967 | apply_predicate(set, 0, _right, stack); | 
|---|
|  | 7968 |  | 
|---|
|  | 7969 | return set; | 
|---|
|  | 7970 | } | 
|---|
|  | 7971 |  | 
|---|
|  | 7972 | case ast_func_id: | 
|---|
|  | 7973 | return xpath_node_set_raw(); | 
|---|
|  | 7974 |  | 
|---|
|  | 7975 | case ast_step: | 
|---|
|  | 7976 | { | 
|---|
|  | 7977 | switch (_axis) | 
|---|
|  | 7978 | { | 
|---|
|  | 7979 | case axis_ancestor: | 
|---|
|  | 7980 | return step_do(c, stack, axis_to_type<axis_ancestor>()); | 
|---|
|  | 7981 |  | 
|---|
|  | 7982 | case axis_ancestor_or_self: | 
|---|
|  | 7983 | return step_do(c, stack, axis_to_type<axis_ancestor_or_self>()); | 
|---|
|  | 7984 |  | 
|---|
|  | 7985 | case axis_attribute: | 
|---|
|  | 7986 | return step_do(c, stack, axis_to_type<axis_attribute>()); | 
|---|
|  | 7987 |  | 
|---|
|  | 7988 | case axis_child: | 
|---|
|  | 7989 | return step_do(c, stack, axis_to_type<axis_child>()); | 
|---|
|  | 7990 |  | 
|---|
|  | 7991 | case axis_descendant: | 
|---|
|  | 7992 | return step_do(c, stack, axis_to_type<axis_descendant>()); | 
|---|
|  | 7993 |  | 
|---|
|  | 7994 | case axis_descendant_or_self: | 
|---|
|  | 7995 | return step_do(c, stack, axis_to_type<axis_descendant_or_self>()); | 
|---|
|  | 7996 |  | 
|---|
|  | 7997 | case axis_following: | 
|---|
|  | 7998 | return step_do(c, stack, axis_to_type<axis_following>()); | 
|---|
|  | 7999 |  | 
|---|
|  | 8000 | case axis_following_sibling: | 
|---|
|  | 8001 | return step_do(c, stack, axis_to_type<axis_following_sibling>()); | 
|---|
|  | 8002 |  | 
|---|
|  | 8003 | case axis_namespace: | 
|---|
|  | 8004 | // namespaced axis is not supported | 
|---|
|  | 8005 | return xpath_node_set_raw(); | 
|---|
|  | 8006 |  | 
|---|
|  | 8007 | case axis_parent: | 
|---|
|  | 8008 | return step_do(c, stack, axis_to_type<axis_parent>()); | 
|---|
|  | 8009 |  | 
|---|
|  | 8010 | case axis_preceding: | 
|---|
|  | 8011 | return step_do(c, stack, axis_to_type<axis_preceding>()); | 
|---|
|  | 8012 |  | 
|---|
|  | 8013 | case axis_preceding_sibling: | 
|---|
|  | 8014 | return step_do(c, stack, axis_to_type<axis_preceding_sibling>()); | 
|---|
|  | 8015 |  | 
|---|
|  | 8016 | case axis_self: | 
|---|
|  | 8017 | return step_do(c, stack, axis_to_type<axis_self>()); | 
|---|
|  | 8018 | } | 
|---|
|  | 8019 | } | 
|---|
|  | 8020 |  | 
|---|
|  | 8021 | case ast_step_root: | 
|---|
|  | 8022 | { | 
|---|
|  | 8023 | assert(!_right); // root step can't have any predicates | 
|---|
|  | 8024 |  | 
|---|
|  | 8025 | xpath_node_set_raw ns; | 
|---|
|  | 8026 |  | 
|---|
|  | 8027 | ns.set_type(xpath_node_set::type_sorted); | 
|---|
|  | 8028 |  | 
|---|
|  | 8029 | if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); | 
|---|
|  | 8030 | else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); | 
|---|
|  | 8031 |  | 
|---|
|  | 8032 | return ns; | 
|---|
|  | 8033 | } | 
|---|
|  | 8034 |  | 
|---|
|  | 8035 | case ast_variable: | 
|---|
|  | 8036 | { | 
|---|
|  | 8037 | assert(_rettype == _data.variable->type()); | 
|---|
|  | 8038 |  | 
|---|
|  | 8039 | if (_rettype == xpath_type_node_set) | 
|---|
|  | 8040 | { | 
|---|
|  | 8041 | const xpath_node_set& s = _data.variable->get_node_set(); | 
|---|
|  | 8042 |  | 
|---|
|  | 8043 | xpath_node_set_raw ns; | 
|---|
|  | 8044 |  | 
|---|
|  | 8045 | ns.set_type(s.type()); | 
|---|
|  | 8046 | ns.append(s.begin(), s.end(), stack.result); | 
|---|
|  | 8047 |  | 
|---|
|  | 8048 | return ns; | 
|---|
|  | 8049 | } | 
|---|
|  | 8050 |  | 
|---|
|  | 8051 | // fallthrough to type conversion | 
|---|
|  | 8052 | } | 
|---|
|  | 8053 |  | 
|---|
|  | 8054 | default: | 
|---|
|  | 8055 | assert(!"Wrong expression for return type node set"); | 
|---|
|  | 8056 | return xpath_node_set_raw(); | 
|---|
|  | 8057 | } | 
|---|
|  | 8058 | } | 
|---|
|  | 8059 |  | 
|---|
|  | 8060 | bool is_posinv() | 
|---|
|  | 8061 | { | 
|---|
|  | 8062 | switch (_type) | 
|---|
|  | 8063 | { | 
|---|
|  | 8064 | case ast_func_position: | 
|---|
|  | 8065 | return false; | 
|---|
|  | 8066 |  | 
|---|
|  | 8067 | case ast_string_constant: | 
|---|
|  | 8068 | case ast_number_constant: | 
|---|
|  | 8069 | case ast_variable: | 
|---|
|  | 8070 | return true; | 
|---|
|  | 8071 |  | 
|---|
|  | 8072 | case ast_step: | 
|---|
|  | 8073 | case ast_step_root: | 
|---|
|  | 8074 | return true; | 
|---|
|  | 8075 |  | 
|---|
|  | 8076 | case ast_predicate: | 
|---|
|  | 8077 | case ast_filter: | 
|---|
|  | 8078 | case ast_filter_posinv: | 
|---|
|  | 8079 | return true; | 
|---|
|  | 8080 |  | 
|---|
|  | 8081 | default: | 
|---|
|  | 8082 | if (_left && !_left->is_posinv()) return false; | 
|---|
|  | 8083 |  | 
|---|
|  | 8084 | for (xpath_ast_node* n = _right; n; n = n->_next) | 
|---|
|  | 8085 | if (!n->is_posinv()) return false; | 
|---|
|  | 8086 |  | 
|---|
|  | 8087 | return true; | 
|---|
|  | 8088 | } | 
|---|
|  | 8089 | } | 
|---|
|  | 8090 |  | 
|---|
|  | 8091 | xpath_value_type rettype() const | 
|---|
|  | 8092 | { | 
|---|
|  | 8093 | return static_cast<xpath_value_type>(_rettype); | 
|---|
|  | 8094 | } | 
|---|
|  | 8095 | }; | 
|---|
|  | 8096 |  | 
|---|
|  | 8097 | struct xpath_parser | 
|---|
|  | 8098 | { | 
|---|
|  | 8099 | xpath_allocator* _alloc; | 
|---|
|  | 8100 | xpath_lexer _lexer; | 
|---|
|  | 8101 |  | 
|---|
|  | 8102 | const char_t* _query; | 
|---|
|  | 8103 | xpath_variable_set* _variables; | 
|---|
|  | 8104 |  | 
|---|
|  | 8105 | xpath_parse_result* _result; | 
|---|
|  | 8106 |  | 
|---|
|  | 8107 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 8108 | jmp_buf _error_handler; | 
|---|
|  | 8109 | #endif | 
|---|
|  | 8110 |  | 
|---|
|  | 8111 | void throw_error(const char* message) | 
|---|
|  | 8112 | { | 
|---|
|  | 8113 | _result->error = message; | 
|---|
|  | 8114 | _result->offset = _lexer.current_pos() - _query; | 
|---|
|  | 8115 |  | 
|---|
|  | 8116 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 8117 | longjmp(_error_handler, 1); | 
|---|
|  | 8118 | #else | 
|---|
|  | 8119 | throw xpath_exception(*_result); | 
|---|
|  | 8120 | #endif | 
|---|
|  | 8121 | } | 
|---|
|  | 8122 |  | 
|---|
|  | 8123 | void throw_error_oom() | 
|---|
|  | 8124 | { | 
|---|
|  | 8125 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 8126 | throw_error("Out of memory"); | 
|---|
|  | 8127 | #else | 
|---|
|  | 8128 | throw std::bad_alloc(); | 
|---|
|  | 8129 | #endif | 
|---|
|  | 8130 | } | 
|---|
|  | 8131 |  | 
|---|
|  | 8132 | void* alloc_node() | 
|---|
|  | 8133 | { | 
|---|
|  | 8134 | void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); | 
|---|
|  | 8135 |  | 
|---|
|  | 8136 | if (!result) throw_error_oom(); | 
|---|
|  | 8137 |  | 
|---|
|  | 8138 | return result; | 
|---|
|  | 8139 | } | 
|---|
|  | 8140 |  | 
|---|
|  | 8141 | const char_t* alloc_string(const xpath_lexer_string& value) | 
|---|
|  | 8142 | { | 
|---|
|  | 8143 | if (value.begin) | 
|---|
|  | 8144 | { | 
|---|
|  | 8145 | size_t length = static_cast<size_t>(value.end - value.begin); | 
|---|
|  | 8146 |  | 
|---|
|  | 8147 | char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); | 
|---|
|  | 8148 | if (!c) throw_error_oom(); | 
|---|
|  | 8149 |  | 
|---|
|  | 8150 | memcpy(c, value.begin, length * sizeof(char_t)); | 
|---|
|  | 8151 | c[length] = 0; | 
|---|
|  | 8152 |  | 
|---|
|  | 8153 | return c; | 
|---|
|  | 8154 | } | 
|---|
|  | 8155 | else return 0; | 
|---|
|  | 8156 | } | 
|---|
|  | 8157 |  | 
|---|
|  | 8158 | xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) | 
|---|
|  | 8159 | { | 
|---|
|  | 8160 | assert(argc <= 1); | 
|---|
|  | 8161 |  | 
|---|
|  | 8162 | if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); | 
|---|
|  | 8163 |  | 
|---|
|  | 8164 | return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); | 
|---|
|  | 8165 | } | 
|---|
|  | 8166 |  | 
|---|
|  | 8167 | xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) | 
|---|
|  | 8168 | { | 
|---|
|  | 8169 | switch (name.begin[0]) | 
|---|
|  | 8170 | { | 
|---|
|  | 8171 | case 'b': | 
|---|
|  | 8172 | if (name == PUGIXML_TEXT("boolean") && argc == 1) | 
|---|
|  | 8173 | return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); | 
|---|
|  | 8174 |  | 
|---|
|  | 8175 | break; | 
|---|
|  | 8176 |  | 
|---|
|  | 8177 | case 'c': | 
|---|
|  | 8178 | if (name == PUGIXML_TEXT("count") && argc == 1) | 
|---|
|  | 8179 | { | 
|---|
|  | 8180 | if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); | 
|---|
|  | 8181 | return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); | 
|---|
|  | 8182 | } | 
|---|
|  | 8183 | else if (name == PUGIXML_TEXT("contains") && argc == 2) | 
|---|
|  | 8184 | return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]); | 
|---|
|  | 8185 | else if (name == PUGIXML_TEXT("concat") && argc >= 2) | 
|---|
|  | 8186 | return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); | 
|---|
|  | 8187 | else if (name == PUGIXML_TEXT("ceiling") && argc == 1) | 
|---|
|  | 8188 | return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); | 
|---|
|  | 8189 |  | 
|---|
|  | 8190 | break; | 
|---|
|  | 8191 |  | 
|---|
|  | 8192 | case 'f': | 
|---|
|  | 8193 | if (name == PUGIXML_TEXT("false") && argc == 0) | 
|---|
|  | 8194 | return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); | 
|---|
|  | 8195 | else if (name == PUGIXML_TEXT("floor") && argc == 1) | 
|---|
|  | 8196 | return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); | 
|---|
|  | 8197 |  | 
|---|
|  | 8198 | break; | 
|---|
|  | 8199 |  | 
|---|
|  | 8200 | case 'i': | 
|---|
|  | 8201 | if (name == PUGIXML_TEXT("id") && argc == 1) | 
|---|
|  | 8202 | return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); | 
|---|
|  | 8203 |  | 
|---|
|  | 8204 | break; | 
|---|
|  | 8205 |  | 
|---|
|  | 8206 | case 'l': | 
|---|
|  | 8207 | if (name == PUGIXML_TEXT("last") && argc == 0) | 
|---|
|  | 8208 | return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); | 
|---|
|  | 8209 | else if (name == PUGIXML_TEXT("lang") && argc == 1) | 
|---|
|  | 8210 | return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); | 
|---|
|  | 8211 | else if (name == PUGIXML_TEXT("local-name") && argc <= 1) | 
|---|
|  | 8212 | return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); | 
|---|
|  | 8213 |  | 
|---|
|  | 8214 | break; | 
|---|
|  | 8215 |  | 
|---|
|  | 8216 | case 'n': | 
|---|
|  | 8217 | if (name == PUGIXML_TEXT("name") && argc <= 1) | 
|---|
|  | 8218 | return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); | 
|---|
|  | 8219 | else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) | 
|---|
|  | 8220 | return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); | 
|---|
|  | 8221 | else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) | 
|---|
|  | 8222 | return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); | 
|---|
|  | 8223 | else if (name == PUGIXML_TEXT("not") && argc == 1) | 
|---|
|  | 8224 | return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); | 
|---|
|  | 8225 | else if (name == PUGIXML_TEXT("number") && argc <= 1) | 
|---|
|  | 8226 | return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); | 
|---|
|  | 8227 |  | 
|---|
|  | 8228 | break; | 
|---|
|  | 8229 |  | 
|---|
|  | 8230 | case 'p': | 
|---|
|  | 8231 | if (name == PUGIXML_TEXT("position") && argc == 0) | 
|---|
|  | 8232 | return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); | 
|---|
|  | 8233 |  | 
|---|
|  | 8234 | break; | 
|---|
|  | 8235 |  | 
|---|
|  | 8236 | case 'r': | 
|---|
|  | 8237 | if (name == PUGIXML_TEXT("round") && argc == 1) | 
|---|
|  | 8238 | return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); | 
|---|
|  | 8239 |  | 
|---|
|  | 8240 | break; | 
|---|
|  | 8241 |  | 
|---|
|  | 8242 | case 's': | 
|---|
|  | 8243 | if (name == PUGIXML_TEXT("string") && argc <= 1) | 
|---|
|  | 8244 | return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); | 
|---|
|  | 8245 | else if (name == PUGIXML_TEXT("string-length") && argc <= 1) | 
|---|
|  | 8246 | return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]); | 
|---|
|  | 8247 | else if (name == PUGIXML_TEXT("starts-with") && argc == 2) | 
|---|
|  | 8248 | return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); | 
|---|
|  | 8249 | else if (name == PUGIXML_TEXT("substring-before") && argc == 2) | 
|---|
|  | 8250 | return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); | 
|---|
|  | 8251 | else if (name == PUGIXML_TEXT("substring-after") && argc == 2) | 
|---|
|  | 8252 | return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); | 
|---|
|  | 8253 | else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) | 
|---|
|  | 8254 | return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); | 
|---|
|  | 8255 | else if (name == PUGIXML_TEXT("sum") && argc == 1) | 
|---|
|  | 8256 | { | 
|---|
|  | 8257 | if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); | 
|---|
|  | 8258 | return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); | 
|---|
|  | 8259 | } | 
|---|
|  | 8260 |  | 
|---|
|  | 8261 | break; | 
|---|
|  | 8262 |  | 
|---|
|  | 8263 | case 't': | 
|---|
|  | 8264 | if (name == PUGIXML_TEXT("translate") && argc == 3) | 
|---|
|  | 8265 | return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); | 
|---|
|  | 8266 | else if (name == PUGIXML_TEXT("true") && argc == 0) | 
|---|
|  | 8267 | return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); | 
|---|
|  | 8268 |  | 
|---|
|  | 8269 | break; | 
|---|
|  | 8270 | } | 
|---|
|  | 8271 |  | 
|---|
|  | 8272 | throw_error("Unrecognized function or wrong parameter count"); | 
|---|
|  | 8273 |  | 
|---|
|  | 8274 | return 0; | 
|---|
|  | 8275 | } | 
|---|
|  | 8276 |  | 
|---|
|  | 8277 | axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) | 
|---|
|  | 8278 | { | 
|---|
|  | 8279 | specified = true; | 
|---|
|  | 8280 |  | 
|---|
|  | 8281 | switch (name.begin[0]) | 
|---|
|  | 8282 | { | 
|---|
|  | 8283 | case 'a': | 
|---|
|  | 8284 | if (name == PUGIXML_TEXT("ancestor")) | 
|---|
|  | 8285 | return axis_ancestor; | 
|---|
|  | 8286 | else if (name == PUGIXML_TEXT("ancestor-or-self")) | 
|---|
|  | 8287 | return axis_ancestor_or_self; | 
|---|
|  | 8288 | else if (name == PUGIXML_TEXT("attribute")) | 
|---|
|  | 8289 | return axis_attribute; | 
|---|
|  | 8290 |  | 
|---|
|  | 8291 | break; | 
|---|
|  | 8292 |  | 
|---|
|  | 8293 | case 'c': | 
|---|
|  | 8294 | if (name == PUGIXML_TEXT("child")) | 
|---|
|  | 8295 | return axis_child; | 
|---|
|  | 8296 |  | 
|---|
|  | 8297 | break; | 
|---|
|  | 8298 |  | 
|---|
|  | 8299 | case 'd': | 
|---|
|  | 8300 | if (name == PUGIXML_TEXT("descendant")) | 
|---|
|  | 8301 | return axis_descendant; | 
|---|
|  | 8302 | else if (name == PUGIXML_TEXT("descendant-or-self")) | 
|---|
|  | 8303 | return axis_descendant_or_self; | 
|---|
|  | 8304 |  | 
|---|
|  | 8305 | break; | 
|---|
|  | 8306 |  | 
|---|
|  | 8307 | case 'f': | 
|---|
|  | 8308 | if (name == PUGIXML_TEXT("following")) | 
|---|
|  | 8309 | return axis_following; | 
|---|
|  | 8310 | else if (name == PUGIXML_TEXT("following-sibling")) | 
|---|
|  | 8311 | return axis_following_sibling; | 
|---|
|  | 8312 |  | 
|---|
|  | 8313 | break; | 
|---|
|  | 8314 |  | 
|---|
|  | 8315 | case 'n': | 
|---|
|  | 8316 | if (name == PUGIXML_TEXT("namespace")) | 
|---|
|  | 8317 | return axis_namespace; | 
|---|
|  | 8318 |  | 
|---|
|  | 8319 | break; | 
|---|
|  | 8320 |  | 
|---|
|  | 8321 | case 'p': | 
|---|
|  | 8322 | if (name == PUGIXML_TEXT("parent")) | 
|---|
|  | 8323 | return axis_parent; | 
|---|
|  | 8324 | else if (name == PUGIXML_TEXT("preceding")) | 
|---|
|  | 8325 | return axis_preceding; | 
|---|
|  | 8326 | else if (name == PUGIXML_TEXT("preceding-sibling")) | 
|---|
|  | 8327 | return axis_preceding_sibling; | 
|---|
|  | 8328 |  | 
|---|
|  | 8329 | break; | 
|---|
|  | 8330 |  | 
|---|
|  | 8331 | case 's': | 
|---|
|  | 8332 | if (name == PUGIXML_TEXT("self")) | 
|---|
|  | 8333 | return axis_self; | 
|---|
|  | 8334 |  | 
|---|
|  | 8335 | break; | 
|---|
|  | 8336 | } | 
|---|
|  | 8337 |  | 
|---|
|  | 8338 | specified = false; | 
|---|
|  | 8339 | return axis_child; | 
|---|
|  | 8340 | } | 
|---|
|  | 8341 |  | 
|---|
|  | 8342 | nodetest_t parse_node_test_type(const xpath_lexer_string& name) | 
|---|
|  | 8343 | { | 
|---|
|  | 8344 | switch (name.begin[0]) | 
|---|
|  | 8345 | { | 
|---|
|  | 8346 | case 'c': | 
|---|
|  | 8347 | if (name == PUGIXML_TEXT("comment")) | 
|---|
|  | 8348 | return nodetest_type_comment; | 
|---|
|  | 8349 |  | 
|---|
|  | 8350 | break; | 
|---|
|  | 8351 |  | 
|---|
|  | 8352 | case 'n': | 
|---|
|  | 8353 | if (name == PUGIXML_TEXT("node")) | 
|---|
|  | 8354 | return nodetest_type_node; | 
|---|
|  | 8355 |  | 
|---|
|  | 8356 | break; | 
|---|
|  | 8357 |  | 
|---|
|  | 8358 | case 'p': | 
|---|
|  | 8359 | if (name == PUGIXML_TEXT("processing-instruction")) | 
|---|
|  | 8360 | return nodetest_type_pi; | 
|---|
|  | 8361 |  | 
|---|
|  | 8362 | break; | 
|---|
|  | 8363 |  | 
|---|
|  | 8364 | case 't': | 
|---|
|  | 8365 | if (name == PUGIXML_TEXT("text")) | 
|---|
|  | 8366 | return nodetest_type_text; | 
|---|
|  | 8367 |  | 
|---|
|  | 8368 | break; | 
|---|
|  | 8369 | } | 
|---|
|  | 8370 |  | 
|---|
|  | 8371 | return nodetest_none; | 
|---|
|  | 8372 | } | 
|---|
|  | 8373 |  | 
|---|
|  | 8374 | // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall | 
|---|
|  | 8375 | xpath_ast_node* parse_primary_expression() | 
|---|
|  | 8376 | { | 
|---|
|  | 8377 | switch (_lexer.current()) | 
|---|
|  | 8378 | { | 
|---|
|  | 8379 | case lex_var_ref: | 
|---|
|  | 8380 | { | 
|---|
|  | 8381 | xpath_lexer_string name = _lexer.contents(); | 
|---|
|  | 8382 |  | 
|---|
|  | 8383 | if (!_variables) | 
|---|
|  | 8384 | throw_error("Unknown variable: variable set is not provided"); | 
|---|
|  | 8385 |  | 
|---|
|  | 8386 | xpath_variable* var = get_variable(_variables, name.begin, name.end); | 
|---|
|  | 8387 |  | 
|---|
|  | 8388 | if (!var) | 
|---|
|  | 8389 | throw_error("Unknown variable: variable set does not contain the given name"); | 
|---|
|  | 8390 |  | 
|---|
|  | 8391 | _lexer.next(); | 
|---|
|  | 8392 |  | 
|---|
|  | 8393 | return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); | 
|---|
|  | 8394 | } | 
|---|
|  | 8395 |  | 
|---|
|  | 8396 | case lex_open_brace: | 
|---|
|  | 8397 | { | 
|---|
|  | 8398 | _lexer.next(); | 
|---|
|  | 8399 |  | 
|---|
|  | 8400 | xpath_ast_node* n = parse_expression(); | 
|---|
|  | 8401 |  | 
|---|
|  | 8402 | if (_lexer.current() != lex_close_brace) | 
|---|
|  | 8403 | throw_error("Unmatched braces"); | 
|---|
|  | 8404 |  | 
|---|
|  | 8405 | _lexer.next(); | 
|---|
|  | 8406 |  | 
|---|
|  | 8407 | return n; | 
|---|
|  | 8408 | } | 
|---|
|  | 8409 |  | 
|---|
|  | 8410 | case lex_quoted_string: | 
|---|
|  | 8411 | { | 
|---|
|  | 8412 | const char_t* value = alloc_string(_lexer.contents()); | 
|---|
|  | 8413 |  | 
|---|
|  | 8414 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); | 
|---|
|  | 8415 | _lexer.next(); | 
|---|
|  | 8416 |  | 
|---|
|  | 8417 | return n; | 
|---|
|  | 8418 | } | 
|---|
|  | 8419 |  | 
|---|
|  | 8420 | case lex_number: | 
|---|
|  | 8421 | { | 
|---|
|  | 8422 | double value = 0; | 
|---|
|  | 8423 |  | 
|---|
|  | 8424 | if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value)) | 
|---|
|  | 8425 | throw_error_oom(); | 
|---|
|  | 8426 |  | 
|---|
|  | 8427 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); | 
|---|
|  | 8428 | _lexer.next(); | 
|---|
|  | 8429 |  | 
|---|
|  | 8430 | return n; | 
|---|
|  | 8431 | } | 
|---|
|  | 8432 |  | 
|---|
|  | 8433 | case lex_string: | 
|---|
|  | 8434 | { | 
|---|
|  | 8435 | xpath_ast_node* args[2] = {0}; | 
|---|
|  | 8436 | size_t argc = 0; | 
|---|
|  | 8437 |  | 
|---|
|  | 8438 | xpath_lexer_string function = _lexer.contents(); | 
|---|
|  | 8439 | _lexer.next(); | 
|---|
|  | 8440 |  | 
|---|
|  | 8441 | xpath_ast_node* last_arg = 0; | 
|---|
|  | 8442 |  | 
|---|
|  | 8443 | if (_lexer.current() != lex_open_brace) | 
|---|
|  | 8444 | throw_error("Unrecognized function call"); | 
|---|
|  | 8445 | _lexer.next(); | 
|---|
|  | 8446 |  | 
|---|
|  | 8447 | if (_lexer.current() != lex_close_brace) | 
|---|
|  | 8448 | args[argc++] = parse_expression(); | 
|---|
|  | 8449 |  | 
|---|
|  | 8450 | while (_lexer.current() != lex_close_brace) | 
|---|
|  | 8451 | { | 
|---|
|  | 8452 | if (_lexer.current() != lex_comma) | 
|---|
|  | 8453 | throw_error("No comma between function arguments"); | 
|---|
|  | 8454 | _lexer.next(); | 
|---|
|  | 8455 |  | 
|---|
|  | 8456 | xpath_ast_node* n = parse_expression(); | 
|---|
|  | 8457 |  | 
|---|
|  | 8458 | if (argc < 2) args[argc] = n; | 
|---|
|  | 8459 | else last_arg->set_next(n); | 
|---|
|  | 8460 |  | 
|---|
|  | 8461 | argc++; | 
|---|
|  | 8462 | last_arg = n; | 
|---|
|  | 8463 | } | 
|---|
|  | 8464 |  | 
|---|
|  | 8465 | _lexer.next(); | 
|---|
|  | 8466 |  | 
|---|
|  | 8467 | return parse_function(function, argc, args); | 
|---|
|  | 8468 | } | 
|---|
|  | 8469 |  | 
|---|
|  | 8470 | default: | 
|---|
|  | 8471 | throw_error("Unrecognizable primary expression"); | 
|---|
|  | 8472 |  | 
|---|
|  | 8473 | return 0; | 
|---|
|  | 8474 | } | 
|---|
|  | 8475 | } | 
|---|
|  | 8476 |  | 
|---|
|  | 8477 | // FilterExpr ::= PrimaryExpr | FilterExpr Predicate | 
|---|
|  | 8478 | // Predicate ::= '[' PredicateExpr ']' | 
|---|
|  | 8479 | // PredicateExpr ::= Expr | 
|---|
|  | 8480 | xpath_ast_node* parse_filter_expression() | 
|---|
|  | 8481 | { | 
|---|
|  | 8482 | xpath_ast_node* n = parse_primary_expression(); | 
|---|
|  | 8483 |  | 
|---|
|  | 8484 | while (_lexer.current() == lex_open_square_brace) | 
|---|
|  | 8485 | { | 
|---|
|  | 8486 | _lexer.next(); | 
|---|
|  | 8487 |  | 
|---|
|  | 8488 | xpath_ast_node* expr = parse_expression(); | 
|---|
|  | 8489 |  | 
|---|
|  | 8490 | if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); | 
|---|
|  | 8491 |  | 
|---|
|  | 8492 | bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv(); | 
|---|
|  | 8493 |  | 
|---|
|  | 8494 | n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr); | 
|---|
|  | 8495 |  | 
|---|
|  | 8496 | if (_lexer.current() != lex_close_square_brace) | 
|---|
|  | 8497 | throw_error("Unmatched square brace"); | 
|---|
|  | 8498 |  | 
|---|
|  | 8499 | _lexer.next(); | 
|---|
|  | 8500 | } | 
|---|
|  | 8501 |  | 
|---|
|  | 8502 | return n; | 
|---|
|  | 8503 | } | 
|---|
|  | 8504 |  | 
|---|
|  | 8505 | // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep | 
|---|
|  | 8506 | // AxisSpecifier ::= AxisName '::' | '@'? | 
|---|
|  | 8507 | // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' | 
|---|
|  | 8508 | // NameTest ::= '*' | NCName ':' '*' | QName | 
|---|
|  | 8509 | // AbbreviatedStep ::= '.' | '..' | 
|---|
|  | 8510 | xpath_ast_node* parse_step(xpath_ast_node* set) | 
|---|
|  | 8511 | { | 
|---|
|  | 8512 | if (set && set->rettype() != xpath_type_node_set) | 
|---|
|  | 8513 | throw_error("Step has to be applied to node set"); | 
|---|
|  | 8514 |  | 
|---|
|  | 8515 | bool axis_specified = false; | 
|---|
|  | 8516 | axis_t axis = axis_child; // implied child axis | 
|---|
|  | 8517 |  | 
|---|
|  | 8518 | if (_lexer.current() == lex_axis_attribute) | 
|---|
|  | 8519 | { | 
|---|
|  | 8520 | axis = axis_attribute; | 
|---|
|  | 8521 | axis_specified = true; | 
|---|
|  | 8522 |  | 
|---|
|  | 8523 | _lexer.next(); | 
|---|
|  | 8524 | } | 
|---|
|  | 8525 | else if (_lexer.current() == lex_dot) | 
|---|
|  | 8526 | { | 
|---|
|  | 8527 | _lexer.next(); | 
|---|
|  | 8528 |  | 
|---|
|  | 8529 | return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); | 
|---|
|  | 8530 | } | 
|---|
|  | 8531 | else if (_lexer.current() == lex_double_dot) | 
|---|
|  | 8532 | { | 
|---|
|  | 8533 | _lexer.next(); | 
|---|
|  | 8534 |  | 
|---|
|  | 8535 | return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); | 
|---|
|  | 8536 | } | 
|---|
|  | 8537 |  | 
|---|
|  | 8538 | nodetest_t nt_type = nodetest_none; | 
|---|
|  | 8539 | xpath_lexer_string nt_name; | 
|---|
|  | 8540 |  | 
|---|
|  | 8541 | if (_lexer.current() == lex_string) | 
|---|
|  | 8542 | { | 
|---|
|  | 8543 | // node name test | 
|---|
|  | 8544 | nt_name = _lexer.contents(); | 
|---|
|  | 8545 | _lexer.next(); | 
|---|
|  | 8546 |  | 
|---|
|  | 8547 | // was it an axis name? | 
|---|
|  | 8548 | if (_lexer.current() == lex_double_colon) | 
|---|
|  | 8549 | { | 
|---|
|  | 8550 | // parse axis name | 
|---|
|  | 8551 | if (axis_specified) throw_error("Two axis specifiers in one step"); | 
|---|
|  | 8552 |  | 
|---|
|  | 8553 | axis = parse_axis_name(nt_name, axis_specified); | 
|---|
|  | 8554 |  | 
|---|
|  | 8555 | if (!axis_specified) throw_error("Unknown axis"); | 
|---|
|  | 8556 |  | 
|---|
|  | 8557 | // read actual node test | 
|---|
|  | 8558 | _lexer.next(); | 
|---|
|  | 8559 |  | 
|---|
|  | 8560 | if (_lexer.current() == lex_multiply) | 
|---|
|  | 8561 | { | 
|---|
|  | 8562 | nt_type = nodetest_all; | 
|---|
|  | 8563 | nt_name = xpath_lexer_string(); | 
|---|
|  | 8564 | _lexer.next(); | 
|---|
|  | 8565 | } | 
|---|
|  | 8566 | else if (_lexer.current() == lex_string) | 
|---|
|  | 8567 | { | 
|---|
|  | 8568 | nt_name = _lexer.contents(); | 
|---|
|  | 8569 | _lexer.next(); | 
|---|
|  | 8570 | } | 
|---|
|  | 8571 | else throw_error("Unrecognized node test"); | 
|---|
|  | 8572 | } | 
|---|
|  | 8573 |  | 
|---|
|  | 8574 | if (nt_type == nodetest_none) | 
|---|
|  | 8575 | { | 
|---|
|  | 8576 | // node type test or processing-instruction | 
|---|
|  | 8577 | if (_lexer.current() == lex_open_brace) | 
|---|
|  | 8578 | { | 
|---|
|  | 8579 | _lexer.next(); | 
|---|
|  | 8580 |  | 
|---|
|  | 8581 | if (_lexer.current() == lex_close_brace) | 
|---|
|  | 8582 | { | 
|---|
|  | 8583 | _lexer.next(); | 
|---|
|  | 8584 |  | 
|---|
|  | 8585 | nt_type = parse_node_test_type(nt_name); | 
|---|
|  | 8586 |  | 
|---|
|  | 8587 | if (nt_type == nodetest_none) throw_error("Unrecognized node type"); | 
|---|
|  | 8588 |  | 
|---|
|  | 8589 | nt_name = xpath_lexer_string(); | 
|---|
|  | 8590 | } | 
|---|
|  | 8591 | else if (nt_name == PUGIXML_TEXT("processing-instruction")) | 
|---|
|  | 8592 | { | 
|---|
|  | 8593 | if (_lexer.current() != lex_quoted_string) | 
|---|
|  | 8594 | throw_error("Only literals are allowed as arguments to processing-instruction()"); | 
|---|
|  | 8595 |  | 
|---|
|  | 8596 | nt_type = nodetest_pi; | 
|---|
|  | 8597 | nt_name = _lexer.contents(); | 
|---|
|  | 8598 | _lexer.next(); | 
|---|
|  | 8599 |  | 
|---|
|  | 8600 | if (_lexer.current() != lex_close_brace) | 
|---|
|  | 8601 | throw_error("Unmatched brace near processing-instruction()"); | 
|---|
|  | 8602 | _lexer.next(); | 
|---|
|  | 8603 | } | 
|---|
|  | 8604 | else | 
|---|
|  | 8605 | throw_error("Unmatched brace near node type test"); | 
|---|
|  | 8606 |  | 
|---|
|  | 8607 | } | 
|---|
|  | 8608 | // QName or NCName:* | 
|---|
|  | 8609 | else | 
|---|
|  | 8610 | { | 
|---|
|  | 8611 | if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* | 
|---|
|  | 8612 | { | 
|---|
|  | 8613 | nt_name.end--; // erase * | 
|---|
|  | 8614 |  | 
|---|
|  | 8615 | nt_type = nodetest_all_in_namespace; | 
|---|
|  | 8616 | } | 
|---|
|  | 8617 | else nt_type = nodetest_name; | 
|---|
|  | 8618 | } | 
|---|
|  | 8619 | } | 
|---|
|  | 8620 | } | 
|---|
|  | 8621 | else if (_lexer.current() == lex_multiply) | 
|---|
|  | 8622 | { | 
|---|
|  | 8623 | nt_type = nodetest_all; | 
|---|
|  | 8624 | _lexer.next(); | 
|---|
|  | 8625 | } | 
|---|
|  | 8626 | else throw_error("Unrecognized node test"); | 
|---|
|  | 8627 |  | 
|---|
|  | 8628 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name)); | 
|---|
|  | 8629 |  | 
|---|
|  | 8630 | xpath_ast_node* last = 0; | 
|---|
|  | 8631 |  | 
|---|
|  | 8632 | while (_lexer.current() == lex_open_square_brace) | 
|---|
|  | 8633 | { | 
|---|
|  | 8634 | _lexer.next(); | 
|---|
|  | 8635 |  | 
|---|
|  | 8636 | xpath_ast_node* expr = parse_expression(); | 
|---|
|  | 8637 |  | 
|---|
|  | 8638 | xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr); | 
|---|
|  | 8639 |  | 
|---|
|  | 8640 | if (_lexer.current() != lex_close_square_brace) | 
|---|
|  | 8641 | throw_error("Unmatched square brace"); | 
|---|
|  | 8642 | _lexer.next(); | 
|---|
|  | 8643 |  | 
|---|
|  | 8644 | if (last) last->set_next(pred); | 
|---|
|  | 8645 | else n->set_right(pred); | 
|---|
|  | 8646 |  | 
|---|
|  | 8647 | last = pred; | 
|---|
|  | 8648 | } | 
|---|
|  | 8649 |  | 
|---|
|  | 8650 | return n; | 
|---|
|  | 8651 | } | 
|---|
|  | 8652 |  | 
|---|
|  | 8653 | // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step | 
|---|
|  | 8654 | xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) | 
|---|
|  | 8655 | { | 
|---|
|  | 8656 | xpath_ast_node* n = parse_step(set); | 
|---|
|  | 8657 |  | 
|---|
|  | 8658 | while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) | 
|---|
|  | 8659 | { | 
|---|
|  | 8660 | lexeme_t l = _lexer.current(); | 
|---|
|  | 8661 | _lexer.next(); | 
|---|
|  | 8662 |  | 
|---|
|  | 8663 | if (l == lex_double_slash) | 
|---|
|  | 8664 | n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | 
|---|
|  | 8665 |  | 
|---|
|  | 8666 | n = parse_step(n); | 
|---|
|  | 8667 | } | 
|---|
|  | 8668 |  | 
|---|
|  | 8669 | return n; | 
|---|
|  | 8670 | } | 
|---|
|  | 8671 |  | 
|---|
|  | 8672 | // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath | 
|---|
|  | 8673 | // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath | 
|---|
|  | 8674 | xpath_ast_node* parse_location_path() | 
|---|
|  | 8675 | { | 
|---|
|  | 8676 | if (_lexer.current() == lex_slash) | 
|---|
|  | 8677 | { | 
|---|
|  | 8678 | _lexer.next(); | 
|---|
|  | 8679 |  | 
|---|
|  | 8680 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); | 
|---|
|  | 8681 |  | 
|---|
|  | 8682 | // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path | 
|---|
|  | 8683 | lexeme_t l = _lexer.current(); | 
|---|
|  | 8684 |  | 
|---|
|  | 8685 | if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) | 
|---|
|  | 8686 | return parse_relative_location_path(n); | 
|---|
|  | 8687 | else | 
|---|
|  | 8688 | return n; | 
|---|
|  | 8689 | } | 
|---|
|  | 8690 | else if (_lexer.current() == lex_double_slash) | 
|---|
|  | 8691 | { | 
|---|
|  | 8692 | _lexer.next(); | 
|---|
|  | 8693 |  | 
|---|
|  | 8694 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); | 
|---|
|  | 8695 | n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | 
|---|
|  | 8696 |  | 
|---|
|  | 8697 | return parse_relative_location_path(n); | 
|---|
|  | 8698 | } | 
|---|
|  | 8699 |  | 
|---|
|  | 8700 | // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 | 
|---|
|  | 8701 | return parse_relative_location_path(0); | 
|---|
|  | 8702 | } | 
|---|
|  | 8703 |  | 
|---|
|  | 8704 | // PathExpr ::= LocationPath | 
|---|
|  | 8705 | //                          | FilterExpr | 
|---|
|  | 8706 | //                          | FilterExpr '/' RelativeLocationPath | 
|---|
|  | 8707 | //                          | FilterExpr '//' RelativeLocationPath | 
|---|
|  | 8708 | xpath_ast_node* parse_path_expression() | 
|---|
|  | 8709 | { | 
|---|
|  | 8710 | // Clarification. | 
|---|
|  | 8711 | // PathExpr begins with either LocationPath or FilterExpr. | 
|---|
|  | 8712 | // FilterExpr begins with PrimaryExpr | 
|---|
|  | 8713 | // PrimaryExpr begins with '$' in case of it being a variable reference, | 
|---|
|  | 8714 | // '(' in case of it being an expression, string literal, number constant or | 
|---|
|  | 8715 | // function call. | 
|---|
|  | 8716 |  | 
|---|
|  | 8717 | if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || | 
|---|
|  | 8718 | _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || | 
|---|
|  | 8719 | _lexer.current() == lex_string) | 
|---|
|  | 8720 | { | 
|---|
|  | 8721 | if (_lexer.current() == lex_string) | 
|---|
|  | 8722 | { | 
|---|
|  | 8723 | // This is either a function call, or not - if not, we shall proceed with location path | 
|---|
|  | 8724 | const char_t* state = _lexer.state(); | 
|---|
|  | 8725 |  | 
|---|
|  | 8726 | while (IS_CHARTYPE(*state, ct_space)) ++state; | 
|---|
|  | 8727 |  | 
|---|
|  | 8728 | if (*state != '(') return parse_location_path(); | 
|---|
|  | 8729 |  | 
|---|
|  | 8730 | // This looks like a function call; however this still can be a node-test. Check it. | 
|---|
|  | 8731 | if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path(); | 
|---|
|  | 8732 | } | 
|---|
|  | 8733 |  | 
|---|
|  | 8734 | xpath_ast_node* n = parse_filter_expression(); | 
|---|
|  | 8735 |  | 
|---|
|  | 8736 | if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) | 
|---|
|  | 8737 | { | 
|---|
|  | 8738 | lexeme_t l = _lexer.current(); | 
|---|
|  | 8739 | _lexer.next(); | 
|---|
|  | 8740 |  | 
|---|
|  | 8741 | if (l == lex_double_slash) | 
|---|
|  | 8742 | { | 
|---|
|  | 8743 | if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set"); | 
|---|
|  | 8744 |  | 
|---|
|  | 8745 | n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | 
|---|
|  | 8746 | } | 
|---|
|  | 8747 |  | 
|---|
|  | 8748 | // select from location path | 
|---|
|  | 8749 | return parse_relative_location_path(n); | 
|---|
|  | 8750 | } | 
|---|
|  | 8751 |  | 
|---|
|  | 8752 | return n; | 
|---|
|  | 8753 | } | 
|---|
|  | 8754 | else return parse_location_path(); | 
|---|
|  | 8755 | } | 
|---|
|  | 8756 |  | 
|---|
|  | 8757 | // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr | 
|---|
|  | 8758 | xpath_ast_node* parse_union_expression() | 
|---|
|  | 8759 | { | 
|---|
|  | 8760 | xpath_ast_node* n = parse_path_expression(); | 
|---|
|  | 8761 |  | 
|---|
|  | 8762 | while (_lexer.current() == lex_union) | 
|---|
|  | 8763 | { | 
|---|
|  | 8764 | _lexer.next(); | 
|---|
|  | 8765 |  | 
|---|
|  | 8766 | xpath_ast_node* expr = parse_union_expression(); | 
|---|
|  | 8767 |  | 
|---|
|  | 8768 | if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set) | 
|---|
|  | 8769 | throw_error("Union operator has to be applied to node sets"); | 
|---|
|  | 8770 |  | 
|---|
|  | 8771 | n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr); | 
|---|
|  | 8772 | } | 
|---|
|  | 8773 |  | 
|---|
|  | 8774 | return n; | 
|---|
|  | 8775 | } | 
|---|
|  | 8776 |  | 
|---|
|  | 8777 | // UnaryExpr ::= UnionExpr | '-' UnaryExpr | 
|---|
|  | 8778 | xpath_ast_node* parse_unary_expression() | 
|---|
|  | 8779 | { | 
|---|
|  | 8780 | if (_lexer.current() == lex_minus) | 
|---|
|  | 8781 | { | 
|---|
|  | 8782 | _lexer.next(); | 
|---|
|  | 8783 |  | 
|---|
|  | 8784 | xpath_ast_node* expr = parse_unary_expression(); | 
|---|
|  | 8785 |  | 
|---|
|  | 8786 | return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); | 
|---|
|  | 8787 | } | 
|---|
|  | 8788 | else return parse_union_expression(); | 
|---|
|  | 8789 | } | 
|---|
|  | 8790 |  | 
|---|
|  | 8791 | // MultiplicativeExpr ::= UnaryExpr | 
|---|
|  | 8792 | //                                            | MultiplicativeExpr '*' UnaryExpr | 
|---|
|  | 8793 | //                                            | MultiplicativeExpr 'div' UnaryExpr | 
|---|
|  | 8794 | //                                            | MultiplicativeExpr 'mod' UnaryExpr | 
|---|
|  | 8795 | xpath_ast_node* parse_multiplicative_expression() | 
|---|
|  | 8796 | { | 
|---|
|  | 8797 | xpath_ast_node* n = parse_unary_expression(); | 
|---|
|  | 8798 |  | 
|---|
|  | 8799 | while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string && | 
|---|
|  | 8800 | (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div")))) | 
|---|
|  | 8801 | { | 
|---|
|  | 8802 | ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply : | 
|---|
|  | 8803 | _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod; | 
|---|
|  | 8804 | _lexer.next(); | 
|---|
|  | 8805 |  | 
|---|
|  | 8806 | xpath_ast_node* expr = parse_unary_expression(); | 
|---|
|  | 8807 |  | 
|---|
|  | 8808 | n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr); | 
|---|
|  | 8809 | } | 
|---|
|  | 8810 |  | 
|---|
|  | 8811 | return n; | 
|---|
|  | 8812 | } | 
|---|
|  | 8813 |  | 
|---|
|  | 8814 | // AdditiveExpr ::= MultiplicativeExpr | 
|---|
|  | 8815 | //                                  | AdditiveExpr '+' MultiplicativeExpr | 
|---|
|  | 8816 | //                                  | AdditiveExpr '-' MultiplicativeExpr | 
|---|
|  | 8817 | xpath_ast_node* parse_additive_expression() | 
|---|
|  | 8818 | { | 
|---|
|  | 8819 | xpath_ast_node* n = parse_multiplicative_expression(); | 
|---|
|  | 8820 |  | 
|---|
|  | 8821 | while (_lexer.current() == lex_plus || _lexer.current() == lex_minus) | 
|---|
|  | 8822 | { | 
|---|
|  | 8823 | lexeme_t l = _lexer.current(); | 
|---|
|  | 8824 |  | 
|---|
|  | 8825 | _lexer.next(); | 
|---|
|  | 8826 |  | 
|---|
|  | 8827 | xpath_ast_node* expr = parse_multiplicative_expression(); | 
|---|
|  | 8828 |  | 
|---|
|  | 8829 | n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr); | 
|---|
|  | 8830 | } | 
|---|
|  | 8831 |  | 
|---|
|  | 8832 | return n; | 
|---|
|  | 8833 | } | 
|---|
|  | 8834 |  | 
|---|
|  | 8835 | // RelationalExpr ::= AdditiveExpr | 
|---|
|  | 8836 | //                                    | RelationalExpr '<' AdditiveExpr | 
|---|
|  | 8837 | //                                    | RelationalExpr '>' AdditiveExpr | 
|---|
|  | 8838 | //                                    | RelationalExpr '<=' AdditiveExpr | 
|---|
|  | 8839 | //                                    | RelationalExpr '>=' AdditiveExpr | 
|---|
|  | 8840 | xpath_ast_node* parse_relational_expression() | 
|---|
|  | 8841 | { | 
|---|
|  | 8842 | xpath_ast_node* n = parse_additive_expression(); | 
|---|
|  | 8843 |  | 
|---|
|  | 8844 | while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal || | 
|---|
|  | 8845 | _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal) | 
|---|
|  | 8846 | { | 
|---|
|  | 8847 | lexeme_t l = _lexer.current(); | 
|---|
|  | 8848 | _lexer.next(); | 
|---|
|  | 8849 |  | 
|---|
|  | 8850 | xpath_ast_node* expr = parse_additive_expression(); | 
|---|
|  | 8851 |  | 
|---|
|  | 8852 | n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater : | 
|---|
|  | 8853 | l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr); | 
|---|
|  | 8854 | } | 
|---|
|  | 8855 |  | 
|---|
|  | 8856 | return n; | 
|---|
|  | 8857 | } | 
|---|
|  | 8858 |  | 
|---|
|  | 8859 | // EqualityExpr ::= RelationalExpr | 
|---|
|  | 8860 | //                                  | EqualityExpr '=' RelationalExpr | 
|---|
|  | 8861 | //                                  | EqualityExpr '!=' RelationalExpr | 
|---|
|  | 8862 | xpath_ast_node* parse_equality_expression() | 
|---|
|  | 8863 | { | 
|---|
|  | 8864 | xpath_ast_node* n = parse_relational_expression(); | 
|---|
|  | 8865 |  | 
|---|
|  | 8866 | while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal) | 
|---|
|  | 8867 | { | 
|---|
|  | 8868 | lexeme_t l = _lexer.current(); | 
|---|
|  | 8869 |  | 
|---|
|  | 8870 | _lexer.next(); | 
|---|
|  | 8871 |  | 
|---|
|  | 8872 | xpath_ast_node* expr = parse_relational_expression(); | 
|---|
|  | 8873 |  | 
|---|
|  | 8874 | n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr); | 
|---|
|  | 8875 | } | 
|---|
|  | 8876 |  | 
|---|
|  | 8877 | return n; | 
|---|
|  | 8878 | } | 
|---|
|  | 8879 |  | 
|---|
|  | 8880 | // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr | 
|---|
|  | 8881 | xpath_ast_node* parse_and_expression() | 
|---|
|  | 8882 | { | 
|---|
|  | 8883 | xpath_ast_node* n = parse_equality_expression(); | 
|---|
|  | 8884 |  | 
|---|
|  | 8885 | while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and")) | 
|---|
|  | 8886 | { | 
|---|
|  | 8887 | _lexer.next(); | 
|---|
|  | 8888 |  | 
|---|
|  | 8889 | xpath_ast_node* expr = parse_equality_expression(); | 
|---|
|  | 8890 |  | 
|---|
|  | 8891 | n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr); | 
|---|
|  | 8892 | } | 
|---|
|  | 8893 |  | 
|---|
|  | 8894 | return n; | 
|---|
|  | 8895 | } | 
|---|
|  | 8896 |  | 
|---|
|  | 8897 | // OrExpr ::= AndExpr | OrExpr 'or' AndExpr | 
|---|
|  | 8898 | xpath_ast_node* parse_or_expression() | 
|---|
|  | 8899 | { | 
|---|
|  | 8900 | xpath_ast_node* n = parse_and_expression(); | 
|---|
|  | 8901 |  | 
|---|
|  | 8902 | while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or")) | 
|---|
|  | 8903 | { | 
|---|
|  | 8904 | _lexer.next(); | 
|---|
|  | 8905 |  | 
|---|
|  | 8906 | xpath_ast_node* expr = parse_and_expression(); | 
|---|
|  | 8907 |  | 
|---|
|  | 8908 | n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr); | 
|---|
|  | 8909 | } | 
|---|
|  | 8910 |  | 
|---|
|  | 8911 | return n; | 
|---|
|  | 8912 | } | 
|---|
|  | 8913 |  | 
|---|
|  | 8914 | // Expr ::= OrExpr | 
|---|
|  | 8915 | xpath_ast_node* parse_expression() | 
|---|
|  | 8916 | { | 
|---|
|  | 8917 | return parse_or_expression(); | 
|---|
|  | 8918 | } | 
|---|
|  | 8919 |  | 
|---|
|  | 8920 | xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) | 
|---|
|  | 8921 | { | 
|---|
|  | 8922 | } | 
|---|
|  | 8923 |  | 
|---|
|  | 8924 | xpath_ast_node* parse() | 
|---|
|  | 8925 | { | 
|---|
|  | 8926 | xpath_ast_node* result = parse_expression(); | 
|---|
|  | 8927 |  | 
|---|
|  | 8928 | if (_lexer.current() != lex_eof) | 
|---|
|  | 8929 | { | 
|---|
|  | 8930 | // there are still unparsed tokens left, error | 
|---|
|  | 8931 | throw_error("Incorrect query"); | 
|---|
|  | 8932 | } | 
|---|
|  | 8933 |  | 
|---|
|  | 8934 | return result; | 
|---|
|  | 8935 | } | 
|---|
|  | 8936 |  | 
|---|
|  | 8937 | static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) | 
|---|
|  | 8938 | { | 
|---|
|  | 8939 | xpath_parser parser(query, variables, alloc, result); | 
|---|
|  | 8940 |  | 
|---|
|  | 8941 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 8942 | int error = setjmp(parser._error_handler); | 
|---|
|  | 8943 |  | 
|---|
|  | 8944 | return (error == 0) ? parser.parse() : 0; | 
|---|
|  | 8945 | #else | 
|---|
|  | 8946 | return parser.parse(); | 
|---|
|  | 8947 | #endif | 
|---|
|  | 8948 | } | 
|---|
|  | 8949 | }; | 
|---|
|  | 8950 |  | 
|---|
|  | 8951 | struct xpath_query_impl | 
|---|
|  | 8952 | { | 
|---|
|  | 8953 | static xpath_query_impl* create() | 
|---|
|  | 8954 | { | 
|---|
|  | 8955 | void* memory = global_allocate(sizeof(xpath_query_impl)); | 
|---|
|  | 8956 |  | 
|---|
|  | 8957 | return new (memory) xpath_query_impl(); | 
|---|
|  | 8958 | } | 
|---|
|  | 8959 |  | 
|---|
|  | 8960 | static void destroy(void* ptr) | 
|---|
|  | 8961 | { | 
|---|
|  | 8962 | if (!ptr) return; | 
|---|
|  | 8963 |  | 
|---|
|  | 8964 | // free all allocated pages | 
|---|
|  | 8965 | static_cast<xpath_query_impl*>(ptr)->alloc.release(); | 
|---|
|  | 8966 |  | 
|---|
|  | 8967 | // free allocator memory (with the first page) | 
|---|
|  | 8968 | global_deallocate(ptr); | 
|---|
|  | 8969 | } | 
|---|
|  | 8970 |  | 
|---|
|  | 8971 | xpath_query_impl(): root(0), alloc(&block) | 
|---|
|  | 8972 | { | 
|---|
|  | 8973 | block.next = 0; | 
|---|
|  | 8974 | } | 
|---|
|  | 8975 |  | 
|---|
|  | 8976 | xpath_ast_node* root; | 
|---|
|  | 8977 | xpath_allocator alloc; | 
|---|
|  | 8978 | xpath_memory_block block; | 
|---|
|  | 8979 | }; | 
|---|
|  | 8980 |  | 
|---|
|  | 8981 | xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) | 
|---|
|  | 8982 | { | 
|---|
|  | 8983 | if (!impl) return xpath_string(); | 
|---|
|  | 8984 |  | 
|---|
|  | 8985 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 8986 | if (setjmp(sd.error_handler)) return xpath_string(); | 
|---|
|  | 8987 | #endif | 
|---|
|  | 8988 |  | 
|---|
|  | 8989 | xpath_context c(n, 1, 1); | 
|---|
|  | 8990 |  | 
|---|
|  | 8991 | return impl->root->eval_string(c, sd.stack); | 
|---|
|  | 8992 | } | 
|---|
|  | 8993 | } | 
|---|
|  | 8994 |  | 
|---|
|  | 8995 | namespace pugi | 
|---|
|  | 8996 | { | 
|---|
|  | 8997 | #ifndef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 8998 | xpath_exception::xpath_exception(const xpath_parse_result& result): _result(result) | 
|---|
|  | 8999 | { | 
|---|
|  | 9000 | assert(result.error); | 
|---|
|  | 9001 | } | 
|---|
|  | 9002 |  | 
|---|
|  | 9003 | const char* xpath_exception::what() const throw() | 
|---|
|  | 9004 | { | 
|---|
|  | 9005 | return _result.error; | 
|---|
|  | 9006 | } | 
|---|
|  | 9007 |  | 
|---|
|  | 9008 | const xpath_parse_result& xpath_exception::result() const | 
|---|
|  | 9009 | { | 
|---|
|  | 9010 | return _result; | 
|---|
|  | 9011 | } | 
|---|
|  | 9012 | #endif | 
|---|
|  | 9013 |  | 
|---|
|  | 9014 | xpath_node::xpath_node() | 
|---|
|  | 9015 | { | 
|---|
|  | 9016 | } | 
|---|
|  | 9017 |  | 
|---|
|  | 9018 | xpath_node::xpath_node(const xml_node& node): _node(node) | 
|---|
|  | 9019 | { | 
|---|
|  | 9020 | } | 
|---|
|  | 9021 |  | 
|---|
|  | 9022 | xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): _node(attribute ? parent : xml_node()), _attribute(attribute) | 
|---|
|  | 9023 | { | 
|---|
|  | 9024 | } | 
|---|
|  | 9025 |  | 
|---|
|  | 9026 | xml_node xpath_node::node() const | 
|---|
|  | 9027 | { | 
|---|
|  | 9028 | return _attribute ? xml_node() : _node; | 
|---|
|  | 9029 | } | 
|---|
|  | 9030 |  | 
|---|
|  | 9031 | xml_attribute xpath_node::attribute() const | 
|---|
|  | 9032 | { | 
|---|
|  | 9033 | return _attribute; | 
|---|
|  | 9034 | } | 
|---|
|  | 9035 |  | 
|---|
|  | 9036 | xml_node xpath_node::parent() const | 
|---|
|  | 9037 | { | 
|---|
|  | 9038 | return _attribute ? _node : _node.parent(); | 
|---|
|  | 9039 | } | 
|---|
|  | 9040 |  | 
|---|
|  | 9041 | xpath_node::operator xpath_node::unspecified_bool_type() const | 
|---|
|  | 9042 | { | 
|---|
|  | 9043 | return (_node || _attribute) ? &xpath_node::_node : 0; | 
|---|
|  | 9044 | } | 
|---|
|  | 9045 |  | 
|---|
|  | 9046 | bool xpath_node::operator!() const | 
|---|
|  | 9047 | { | 
|---|
|  | 9048 | return !(_node || _attribute); | 
|---|
|  | 9049 | } | 
|---|
|  | 9050 |  | 
|---|
|  | 9051 | bool xpath_node::operator==(const xpath_node& n) const | 
|---|
|  | 9052 | { | 
|---|
|  | 9053 | return _node == n._node && _attribute == n._attribute; | 
|---|
|  | 9054 | } | 
|---|
|  | 9055 |  | 
|---|
|  | 9056 | bool xpath_node::operator!=(const xpath_node& n) const | 
|---|
|  | 9057 | { | 
|---|
|  | 9058 | return _node != n._node || _attribute != n._attribute; | 
|---|
|  | 9059 | } | 
|---|
|  | 9060 |  | 
|---|
|  | 9061 | #ifdef __BORLANDC__ | 
|---|
|  | 9062 | bool operator&&(const xpath_node& lhs, bool rhs) | 
|---|
|  | 9063 | { | 
|---|
|  | 9064 | return (bool)lhs && rhs; | 
|---|
|  | 9065 | } | 
|---|
|  | 9066 |  | 
|---|
|  | 9067 | bool operator||(const xpath_node& lhs, bool rhs) | 
|---|
|  | 9068 | { | 
|---|
|  | 9069 | return (bool)lhs || rhs; | 
|---|
|  | 9070 | } | 
|---|
|  | 9071 | #endif | 
|---|
|  | 9072 |  | 
|---|
|  | 9073 | void xpath_node_set::_assign(const_iterator begin, const_iterator end) | 
|---|
|  | 9074 | { | 
|---|
|  | 9075 | assert(begin <= end); | 
|---|
|  | 9076 |  | 
|---|
|  | 9077 | size_t size = static_cast<size_t>(end - begin); | 
|---|
|  | 9078 |  | 
|---|
|  | 9079 | if (size <= 1) | 
|---|
|  | 9080 | { | 
|---|
|  | 9081 | // deallocate old buffer | 
|---|
|  | 9082 | if (_begin != &_storage) global_deallocate(_begin); | 
|---|
|  | 9083 |  | 
|---|
|  | 9084 | // use internal buffer | 
|---|
|  | 9085 | if (begin != end) _storage = *begin; | 
|---|
|  | 9086 |  | 
|---|
|  | 9087 | _begin = &_storage; | 
|---|
|  | 9088 | _end = &_storage + size; | 
|---|
|  | 9089 | } | 
|---|
|  | 9090 | else | 
|---|
|  | 9091 | { | 
|---|
|  | 9092 | // make heap copy | 
|---|
|  | 9093 | xpath_node* storage = static_cast<xpath_node*>(global_allocate(size * sizeof(xpath_node))); | 
|---|
|  | 9094 |  | 
|---|
|  | 9095 | if (!storage) | 
|---|
|  | 9096 | { | 
|---|
|  | 9097 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 9098 | return; | 
|---|
|  | 9099 | #else | 
|---|
|  | 9100 | throw std::bad_alloc(); | 
|---|
|  | 9101 | #endif | 
|---|
|  | 9102 | } | 
|---|
|  | 9103 |  | 
|---|
|  | 9104 | memcpy(storage, begin, size * sizeof(xpath_node)); | 
|---|
|  | 9105 |  | 
|---|
|  | 9106 | // deallocate old buffer | 
|---|
|  | 9107 | if (_begin != &_storage) global_deallocate(_begin); | 
|---|
|  | 9108 |  | 
|---|
|  | 9109 | // finalize | 
|---|
|  | 9110 | _begin = storage; | 
|---|
|  | 9111 | _end = storage + size; | 
|---|
|  | 9112 | } | 
|---|
|  | 9113 | } | 
|---|
|  | 9114 |  | 
|---|
|  | 9115 | xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage) | 
|---|
|  | 9116 | { | 
|---|
|  | 9117 | } | 
|---|
|  | 9118 |  | 
|---|
|  | 9119 | xpath_node_set::xpath_node_set(const_iterator begin, const_iterator end, type_t type): _type(type), _begin(&_storage), _end(&_storage) | 
|---|
|  | 9120 | { | 
|---|
|  | 9121 | _assign(begin, end); | 
|---|
|  | 9122 | } | 
|---|
|  | 9123 |  | 
|---|
|  | 9124 | xpath_node_set::~xpath_node_set() | 
|---|
|  | 9125 | { | 
|---|
|  | 9126 | if (_begin != &_storage) global_deallocate(_begin); | 
|---|
|  | 9127 | } | 
|---|
|  | 9128 |  | 
|---|
|  | 9129 | xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage) | 
|---|
|  | 9130 | { | 
|---|
|  | 9131 | _assign(ns._begin, ns._end); | 
|---|
|  | 9132 | } | 
|---|
|  | 9133 |  | 
|---|
|  | 9134 | xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) | 
|---|
|  | 9135 | { | 
|---|
|  | 9136 | if (this == &ns) return *this; | 
|---|
|  | 9137 |  | 
|---|
|  | 9138 | _type = ns._type; | 
|---|
|  | 9139 | _assign(ns._begin, ns._end); | 
|---|
|  | 9140 |  | 
|---|
|  | 9141 | return *this; | 
|---|
|  | 9142 | } | 
|---|
|  | 9143 |  | 
|---|
|  | 9144 | xpath_node_set::type_t xpath_node_set::type() const | 
|---|
|  | 9145 | { | 
|---|
|  | 9146 | return _type; | 
|---|
|  | 9147 | } | 
|---|
|  | 9148 |  | 
|---|
|  | 9149 | size_t xpath_node_set::size() const | 
|---|
|  | 9150 | { | 
|---|
|  | 9151 | return _end - _begin; | 
|---|
|  | 9152 | } | 
|---|
|  | 9153 |  | 
|---|
|  | 9154 | bool xpath_node_set::empty() const | 
|---|
|  | 9155 | { | 
|---|
|  | 9156 | return _begin == _end; | 
|---|
|  | 9157 | } | 
|---|
|  | 9158 |  | 
|---|
|  | 9159 | const xpath_node& xpath_node_set::operator[](size_t index) const | 
|---|
|  | 9160 | { | 
|---|
|  | 9161 | assert(index < size()); | 
|---|
|  | 9162 | return _begin[index]; | 
|---|
|  | 9163 | } | 
|---|
|  | 9164 |  | 
|---|
|  | 9165 | xpath_node_set::const_iterator xpath_node_set::begin() const | 
|---|
|  | 9166 | { | 
|---|
|  | 9167 | return _begin; | 
|---|
|  | 9168 | } | 
|---|
|  | 9169 |  | 
|---|
|  | 9170 | xpath_node_set::const_iterator xpath_node_set::end() const | 
|---|
|  | 9171 | { | 
|---|
|  | 9172 | return _end; | 
|---|
|  | 9173 | } | 
|---|
|  | 9174 |  | 
|---|
|  | 9175 | void xpath_node_set::sort(bool reverse) | 
|---|
|  | 9176 | { | 
|---|
|  | 9177 | _type = xpath_sort(_begin, _end, _type, reverse); | 
|---|
|  | 9178 | } | 
|---|
|  | 9179 |  | 
|---|
|  | 9180 | xpath_node xpath_node_set::first() const | 
|---|
|  | 9181 | { | 
|---|
|  | 9182 | return xpath_first(_begin, _end, _type); | 
|---|
|  | 9183 | } | 
|---|
|  | 9184 |  | 
|---|
|  | 9185 | xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) | 
|---|
|  | 9186 | { | 
|---|
|  | 9187 | } | 
|---|
|  | 9188 |  | 
|---|
|  | 9189 | xpath_parse_result::operator bool() const | 
|---|
|  | 9190 | { | 
|---|
|  | 9191 | return error == 0; | 
|---|
|  | 9192 | } | 
|---|
|  | 9193 | const char* xpath_parse_result::description() const | 
|---|
|  | 9194 | { | 
|---|
|  | 9195 | return error ? error : "No error"; | 
|---|
|  | 9196 | } | 
|---|
|  | 9197 |  | 
|---|
|  | 9198 | xpath_variable::xpath_variable() | 
|---|
|  | 9199 | { | 
|---|
|  | 9200 | } | 
|---|
|  | 9201 |  | 
|---|
|  | 9202 | const char_t* xpath_variable::name() const | 
|---|
|  | 9203 | { | 
|---|
|  | 9204 | switch (_type) | 
|---|
|  | 9205 | { | 
|---|
|  | 9206 | case xpath_type_node_set: | 
|---|
|  | 9207 | return static_cast<const xpath_variable_node_set*>(this)->name; | 
|---|
|  | 9208 |  | 
|---|
|  | 9209 | case xpath_type_number: | 
|---|
|  | 9210 | return static_cast<const xpath_variable_number*>(this)->name; | 
|---|
|  | 9211 |  | 
|---|
|  | 9212 | case xpath_type_string: | 
|---|
|  | 9213 | return static_cast<const xpath_variable_string*>(this)->name; | 
|---|
|  | 9214 |  | 
|---|
|  | 9215 | case xpath_type_boolean: | 
|---|
|  | 9216 | return static_cast<const xpath_variable_boolean*>(this)->name; | 
|---|
|  | 9217 |  | 
|---|
|  | 9218 | default: | 
|---|
|  | 9219 | assert(!"Invalid variable type"); | 
|---|
|  | 9220 | return 0; | 
|---|
|  | 9221 | } | 
|---|
|  | 9222 | } | 
|---|
|  | 9223 |  | 
|---|
|  | 9224 | xpath_value_type xpath_variable::type() const | 
|---|
|  | 9225 | { | 
|---|
|  | 9226 | return _type; | 
|---|
|  | 9227 | } | 
|---|
|  | 9228 |  | 
|---|
|  | 9229 | bool xpath_variable::get_boolean() const | 
|---|
|  | 9230 | { | 
|---|
|  | 9231 | return (_type == xpath_type_boolean) ? static_cast<const xpath_variable_boolean*>(this)->value : false; | 
|---|
|  | 9232 | } | 
|---|
|  | 9233 |  | 
|---|
|  | 9234 | double xpath_variable::get_number() const | 
|---|
|  | 9235 | { | 
|---|
|  | 9236 | return (_type == xpath_type_number) ? static_cast<const xpath_variable_number*>(this)->value : gen_nan(); | 
|---|
|  | 9237 | } | 
|---|
|  | 9238 |  | 
|---|
|  | 9239 | const char_t* xpath_variable::get_string() const | 
|---|
|  | 9240 | { | 
|---|
|  | 9241 | const char_t* value = (_type == xpath_type_string) ? static_cast<const xpath_variable_string*>(this)->value : 0; | 
|---|
|  | 9242 | return value ? value : PUGIXML_TEXT(""); | 
|---|
|  | 9243 | } | 
|---|
|  | 9244 |  | 
|---|
|  | 9245 | const xpath_node_set& xpath_variable::get_node_set() const | 
|---|
|  | 9246 | { | 
|---|
|  | 9247 | return (_type == xpath_type_node_set) ? static_cast<const xpath_variable_node_set*>(this)->value : dummy_node_set; | 
|---|
|  | 9248 | } | 
|---|
|  | 9249 |  | 
|---|
|  | 9250 | bool xpath_variable::set(bool value) | 
|---|
|  | 9251 | { | 
|---|
|  | 9252 | if (_type != xpath_type_boolean) return false; | 
|---|
|  | 9253 |  | 
|---|
|  | 9254 | static_cast<xpath_variable_boolean*>(this)->value = value; | 
|---|
|  | 9255 | return true; | 
|---|
|  | 9256 | } | 
|---|
|  | 9257 |  | 
|---|
|  | 9258 | bool xpath_variable::set(double value) | 
|---|
|  | 9259 | { | 
|---|
|  | 9260 | if (_type != xpath_type_number) return false; | 
|---|
|  | 9261 |  | 
|---|
|  | 9262 | static_cast<xpath_variable_number*>(this)->value = value; | 
|---|
|  | 9263 | return true; | 
|---|
|  | 9264 | } | 
|---|
|  | 9265 |  | 
|---|
|  | 9266 | bool xpath_variable::set(const char_t* value) | 
|---|
|  | 9267 | { | 
|---|
|  | 9268 | if (_type != xpath_type_string) return false; | 
|---|
|  | 9269 |  | 
|---|
|  | 9270 | xpath_variable_string* var = static_cast<xpath_variable_string*>(this); | 
|---|
|  | 9271 |  | 
|---|
|  | 9272 | // duplicate string | 
|---|
|  | 9273 | size_t size = (strlength(value) + 1) * sizeof(char_t); | 
|---|
|  | 9274 |  | 
|---|
|  | 9275 | char_t* copy = static_cast<char_t*>(global_allocate(size)); | 
|---|
|  | 9276 | if (!copy) return false; | 
|---|
|  | 9277 |  | 
|---|
|  | 9278 | memcpy(copy, value, size); | 
|---|
|  | 9279 |  | 
|---|
|  | 9280 | // replace old string | 
|---|
|  | 9281 | if (var->value) global_deallocate(var->value); | 
|---|
|  | 9282 | var->value = copy; | 
|---|
|  | 9283 |  | 
|---|
|  | 9284 | return true; | 
|---|
|  | 9285 | } | 
|---|
|  | 9286 |  | 
|---|
|  | 9287 | bool xpath_variable::set(const xpath_node_set& value) | 
|---|
|  | 9288 | { | 
|---|
|  | 9289 | if (_type != xpath_type_node_set) return false; | 
|---|
|  | 9290 |  | 
|---|
|  | 9291 | static_cast<xpath_variable_node_set*>(this)->value = value; | 
|---|
|  | 9292 | return true; | 
|---|
|  | 9293 | } | 
|---|
|  | 9294 |  | 
|---|
|  | 9295 | xpath_variable_set::xpath_variable_set() | 
|---|
|  | 9296 | { | 
|---|
|  | 9297 | for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0; | 
|---|
|  | 9298 | } | 
|---|
|  | 9299 |  | 
|---|
|  | 9300 | xpath_variable_set::~xpath_variable_set() | 
|---|
|  | 9301 | { | 
|---|
|  | 9302 | for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | 
|---|
|  | 9303 | { | 
|---|
|  | 9304 | xpath_variable* var = _data[i]; | 
|---|
|  | 9305 |  | 
|---|
|  | 9306 | while (var) | 
|---|
|  | 9307 | { | 
|---|
|  | 9308 | xpath_variable* next = var->_next; | 
|---|
|  | 9309 |  | 
|---|
|  | 9310 | delete_xpath_variable(var->_type, var); | 
|---|
|  | 9311 |  | 
|---|
|  | 9312 | var = next; | 
|---|
|  | 9313 | } | 
|---|
|  | 9314 | } | 
|---|
|  | 9315 | } | 
|---|
|  | 9316 |  | 
|---|
|  | 9317 | xpath_variable* xpath_variable_set::find(const char_t* name) const | 
|---|
|  | 9318 | { | 
|---|
|  | 9319 | const size_t hash_size = sizeof(_data) / sizeof(_data[0]); | 
|---|
|  | 9320 | size_t hash = hash_string(name) % hash_size; | 
|---|
|  | 9321 |  | 
|---|
|  | 9322 | // look for existing variable | 
|---|
|  | 9323 | for (xpath_variable* var = _data[hash]; var; var = var->_next) | 
|---|
|  | 9324 | if (strequal(var->name(), name)) | 
|---|
|  | 9325 | return var; | 
|---|
|  | 9326 |  | 
|---|
|  | 9327 | return 0; | 
|---|
|  | 9328 | } | 
|---|
|  | 9329 |  | 
|---|
|  | 9330 | xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) | 
|---|
|  | 9331 | { | 
|---|
|  | 9332 | const size_t hash_size = sizeof(_data) / sizeof(_data[0]); | 
|---|
|  | 9333 | size_t hash = hash_string(name) % hash_size; | 
|---|
|  | 9334 |  | 
|---|
|  | 9335 | // look for existing variable | 
|---|
|  | 9336 | for (xpath_variable* var = _data[hash]; var; var = var->_next) | 
|---|
|  | 9337 | if (strequal(var->name(), name)) | 
|---|
|  | 9338 | return var->type() == type ? var : 0; | 
|---|
|  | 9339 |  | 
|---|
|  | 9340 | // add new variable | 
|---|
|  | 9341 | xpath_variable* result = new_xpath_variable(type, name); | 
|---|
|  | 9342 |  | 
|---|
|  | 9343 | if (result) | 
|---|
|  | 9344 | { | 
|---|
|  | 9345 | result->_type = type; | 
|---|
|  | 9346 | result->_next = _data[hash]; | 
|---|
|  | 9347 |  | 
|---|
|  | 9348 | _data[hash] = result; | 
|---|
|  | 9349 | } | 
|---|
|  | 9350 |  | 
|---|
|  | 9351 | return result; | 
|---|
|  | 9352 | } | 
|---|
|  | 9353 |  | 
|---|
|  | 9354 | bool xpath_variable_set::set(const char_t* name, bool value) | 
|---|
|  | 9355 | { | 
|---|
|  | 9356 | xpath_variable* var = add(name, xpath_type_boolean); | 
|---|
|  | 9357 | return var ? var->set(value) : false; | 
|---|
|  | 9358 | } | 
|---|
|  | 9359 |  | 
|---|
|  | 9360 | bool xpath_variable_set::set(const char_t* name, double value) | 
|---|
|  | 9361 | { | 
|---|
|  | 9362 | xpath_variable* var = add(name, xpath_type_number); | 
|---|
|  | 9363 | return var ? var->set(value) : false; | 
|---|
|  | 9364 | } | 
|---|
|  | 9365 |  | 
|---|
|  | 9366 | bool xpath_variable_set::set(const char_t* name, const char_t* value) | 
|---|
|  | 9367 | { | 
|---|
|  | 9368 | xpath_variable* var = add(name, xpath_type_string); | 
|---|
|  | 9369 | return var ? var->set(value) : false; | 
|---|
|  | 9370 | } | 
|---|
|  | 9371 |  | 
|---|
|  | 9372 | bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) | 
|---|
|  | 9373 | { | 
|---|
|  | 9374 | xpath_variable* var = add(name, xpath_type_node_set); | 
|---|
|  | 9375 | return var ? var->set(value) : false; | 
|---|
|  | 9376 | } | 
|---|
|  | 9377 |  | 
|---|
|  | 9378 | xpath_variable* xpath_variable_set::get(const char_t* name) | 
|---|
|  | 9379 | { | 
|---|
|  | 9380 | return find(name); | 
|---|
|  | 9381 | } | 
|---|
|  | 9382 |  | 
|---|
|  | 9383 | const xpath_variable* xpath_variable_set::get(const char_t* name) const | 
|---|
|  | 9384 | { | 
|---|
|  | 9385 | return find(name); | 
|---|
|  | 9386 | } | 
|---|
|  | 9387 |  | 
|---|
|  | 9388 | xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) | 
|---|
|  | 9389 | { | 
|---|
|  | 9390 | xpath_query_impl* impl = xpath_query_impl::create(); | 
|---|
|  | 9391 |  | 
|---|
|  | 9392 | if (!impl) | 
|---|
|  | 9393 | { | 
|---|
|  | 9394 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 9395 | _result.error = "Out of memory"; | 
|---|
|  | 9396 | #else | 
|---|
|  | 9397 | throw std::bad_alloc(); | 
|---|
|  | 9398 | #endif | 
|---|
|  | 9399 | } | 
|---|
|  | 9400 | else | 
|---|
|  | 9401 | { | 
|---|
|  | 9402 | buffer_holder impl_holder(impl, xpath_query_impl::destroy); | 
|---|
|  | 9403 |  | 
|---|
|  | 9404 | impl->root = xpath_parser::parse(query, variables, &impl->alloc, &_result); | 
|---|
|  | 9405 |  | 
|---|
|  | 9406 | if (impl->root) | 
|---|
|  | 9407 | { | 
|---|
|  | 9408 | _impl = static_cast<xpath_query_impl*>(impl_holder.release()); | 
|---|
|  | 9409 | _result.error = 0; | 
|---|
|  | 9410 | } | 
|---|
|  | 9411 | } | 
|---|
|  | 9412 | } | 
|---|
|  | 9413 |  | 
|---|
|  | 9414 | xpath_query::~xpath_query() | 
|---|
|  | 9415 | { | 
|---|
|  | 9416 | xpath_query_impl::destroy(_impl); | 
|---|
|  | 9417 | } | 
|---|
|  | 9418 |  | 
|---|
|  | 9419 | xpath_value_type xpath_query::return_type() const | 
|---|
|  | 9420 | { | 
|---|
|  | 9421 | if (!_impl) return xpath_type_none; | 
|---|
|  | 9422 |  | 
|---|
|  | 9423 | return static_cast<xpath_query_impl*>(_impl)->root->rettype(); | 
|---|
|  | 9424 | } | 
|---|
|  | 9425 |  | 
|---|
|  | 9426 | bool xpath_query::evaluate_boolean(const xpath_node& n) const | 
|---|
|  | 9427 | { | 
|---|
|  | 9428 | if (!_impl) return false; | 
|---|
|  | 9429 |  | 
|---|
|  | 9430 | xpath_context c(n, 1, 1); | 
|---|
|  | 9431 | xpath_stack_data sd; | 
|---|
|  | 9432 |  | 
|---|
|  | 9433 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 9434 | if (setjmp(sd.error_handler)) return false; | 
|---|
|  | 9435 | #endif | 
|---|
|  | 9436 |  | 
|---|
|  | 9437 | return static_cast<xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); | 
|---|
|  | 9438 | } | 
|---|
|  | 9439 |  | 
|---|
|  | 9440 | double xpath_query::evaluate_number(const xpath_node& n) const | 
|---|
|  | 9441 | { | 
|---|
|  | 9442 | if (!_impl) return gen_nan(); | 
|---|
|  | 9443 |  | 
|---|
|  | 9444 | xpath_context c(n, 1, 1); | 
|---|
|  | 9445 | xpath_stack_data sd; | 
|---|
|  | 9446 |  | 
|---|
|  | 9447 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 9448 | if (setjmp(sd.error_handler)) return gen_nan(); | 
|---|
|  | 9449 | #endif | 
|---|
|  | 9450 |  | 
|---|
|  | 9451 | return static_cast<xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); | 
|---|
|  | 9452 | } | 
|---|
|  | 9453 |  | 
|---|
|  | 9454 | #ifndef PUGIXML_NO_STL | 
|---|
|  | 9455 | string_t xpath_query::evaluate_string(const xpath_node& n) const | 
|---|
|  | 9456 | { | 
|---|
|  | 9457 | xpath_stack_data sd; | 
|---|
|  | 9458 |  | 
|---|
|  | 9459 | return evaluate_string_impl(static_cast<xpath_query_impl*>(_impl), n, sd).c_str(); | 
|---|
|  | 9460 | } | 
|---|
|  | 9461 | #endif | 
|---|
|  | 9462 |  | 
|---|
|  | 9463 | size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const | 
|---|
|  | 9464 | { | 
|---|
|  | 9465 | xpath_stack_data sd; | 
|---|
|  | 9466 |  | 
|---|
|  | 9467 | xpath_string r = evaluate_string_impl(static_cast<xpath_query_impl*>(_impl), n, sd); | 
|---|
|  | 9468 |  | 
|---|
|  | 9469 | size_t full_size = r.length() + 1; | 
|---|
|  | 9470 |  | 
|---|
|  | 9471 | if (capacity > 0) | 
|---|
|  | 9472 | { | 
|---|
|  | 9473 | size_t size = (full_size < capacity) ? full_size : capacity; | 
|---|
|  | 9474 | assert(size > 0); | 
|---|
|  | 9475 |  | 
|---|
|  | 9476 | memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); | 
|---|
|  | 9477 | buffer[size - 1] = 0; | 
|---|
|  | 9478 | } | 
|---|
|  | 9479 |  | 
|---|
|  | 9480 | return full_size; | 
|---|
|  | 9481 | } | 
|---|
|  | 9482 |  | 
|---|
|  | 9483 | xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const | 
|---|
|  | 9484 | { | 
|---|
|  | 9485 | if (!_impl) return xpath_node_set(); | 
|---|
|  | 9486 |  | 
|---|
|  | 9487 | xpath_ast_node* root = static_cast<xpath_query_impl*>(_impl)->root; | 
|---|
|  | 9488 |  | 
|---|
|  | 9489 | if (root->rettype() != xpath_type_node_set) | 
|---|
|  | 9490 | { | 
|---|
|  | 9491 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 9492 | return xpath_node_set(); | 
|---|
|  | 9493 | #else | 
|---|
|  | 9494 | xpath_parse_result result; | 
|---|
|  | 9495 | result.error = "Expression does not evaluate to node set"; | 
|---|
|  | 9496 |  | 
|---|
|  | 9497 | throw xpath_exception(result); | 
|---|
|  | 9498 | #endif | 
|---|
|  | 9499 | } | 
|---|
|  | 9500 |  | 
|---|
|  | 9501 | xpath_context c(n, 1, 1); | 
|---|
|  | 9502 | xpath_stack_data sd; | 
|---|
|  | 9503 |  | 
|---|
|  | 9504 | #ifdef PUGIXML_NO_EXCEPTIONS | 
|---|
|  | 9505 | if (setjmp(sd.error_handler)) return xpath_node_set(); | 
|---|
|  | 9506 | #endif | 
|---|
|  | 9507 |  | 
|---|
|  | 9508 | xpath_node_set_raw r = root->eval_node_set(c, sd.stack); | 
|---|
|  | 9509 |  | 
|---|
|  | 9510 | return xpath_node_set(r.begin(), r.end(), r.type()); | 
|---|
|  | 9511 | } | 
|---|
|  | 9512 |  | 
|---|
|  | 9513 | const xpath_parse_result& xpath_query::result() const | 
|---|
|  | 9514 | { | 
|---|
|  | 9515 | return _result; | 
|---|
|  | 9516 | } | 
|---|
|  | 9517 |  | 
|---|
|  | 9518 | xpath_query::operator xpath_query::unspecified_bool_type() const | 
|---|
|  | 9519 | { | 
|---|
|  | 9520 | return _impl ? &xpath_query::_impl : 0; | 
|---|
|  | 9521 | } | 
|---|
|  | 9522 |  | 
|---|
|  | 9523 | bool xpath_query::operator!() const | 
|---|
|  | 9524 | { | 
|---|
|  | 9525 | return !_impl; | 
|---|
|  | 9526 | } | 
|---|
|  | 9527 |  | 
|---|
|  | 9528 | xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const | 
|---|
|  | 9529 | { | 
|---|
|  | 9530 | xpath_query q(query, variables); | 
|---|
|  | 9531 | return select_single_node(q); | 
|---|
|  | 9532 | } | 
|---|
|  | 9533 |  | 
|---|
|  | 9534 | xpath_node xml_node::select_single_node(const xpath_query& query) const | 
|---|
|  | 9535 | { | 
|---|
|  | 9536 | xpath_node_set s = query.evaluate_node_set(*this); | 
|---|
|  | 9537 | return s.empty() ? xpath_node() : s.first(); | 
|---|
|  | 9538 | } | 
|---|
|  | 9539 |  | 
|---|
|  | 9540 | xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const | 
|---|
|  | 9541 | { | 
|---|
|  | 9542 | xpath_query q(query, variables); | 
|---|
|  | 9543 | return select_nodes(q); | 
|---|
|  | 9544 | } | 
|---|
|  | 9545 |  | 
|---|
|  | 9546 | xpath_node_set xml_node::select_nodes(const xpath_query& query) const | 
|---|
|  | 9547 | { | 
|---|
|  | 9548 | return query.evaluate_node_set(*this); | 
|---|
|  | 9549 | } | 
|---|
|  | 9550 | } | 
|---|
|  | 9551 |  | 
|---|
|  | 9552 | #endif | 
|---|
|  | 9553 |  | 
|---|
|  | 9554 | /** | 
|---|
|  | 9555 | * Copyright (c) 2006-2010 Arseny Kapoulkine | 
|---|
|  | 9556 | * | 
|---|
|  | 9557 | * Permission is hereby granted, free of charge, to any person | 
|---|
|  | 9558 | * obtaining a copy of this software and associated documentation | 
|---|
|  | 9559 | * files (the "Software"), to deal in the Software without | 
|---|
|  | 9560 | * restriction, including without limitation the rights to use, | 
|---|
|  | 9561 | * copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|---|
|  | 9562 | * copies of the Software, and to permit persons to whom the | 
|---|
|  | 9563 | * Software is furnished to do so, subject to the following | 
|---|
|  | 9564 | * conditions: | 
|---|
|  | 9565 | * | 
|---|
|  | 9566 | * The above copyright notice and this permission notice shall be | 
|---|
|  | 9567 | * included in all copies or substantial portions of the Software. | 
|---|
|  | 9568 | * | 
|---|
|  | 9569 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
|---|
|  | 9570 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | 
|---|
|  | 9571 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 
|---|
|  | 9572 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | 
|---|
|  | 9573 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | 
|---|
|  | 9574 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 
|---|
|  | 9575 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | 
|---|
|  | 9576 | * OTHER DEALINGS IN THE SOFTWARE. | 
|---|
|  | 9577 | */ | 
|---|