1 | /**
|
---|
2 | * pugixml parser - version 1.0
|
---|
3 | * --------------------------------------------------------
|
---|
4 | * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
---|
5 | * Report bugs and download new versions at http://pugixml.org/
|
---|
6 | *
|
---|
7 | * This library is distributed under the MIT License. See notice at the end
|
---|
8 | * of this file.
|
---|
9 | *
|
---|
10 | * This work is based on the pugxml parser, which is:
|
---|
11 | * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
|
---|
12 | */
|
---|
13 |
|
---|
14 | #include "pugixml.hpp"
|
---|
15 |
|
---|
16 | #include <stdlib.h>
|
---|
17 | #include <stdio.h>
|
---|
18 | #include <string.h>
|
---|
19 | #include <assert.h>
|
---|
20 | #include <setjmp.h>
|
---|
21 | #include <wchar.h>
|
---|
22 |
|
---|
23 | #ifndef PUGIXML_NO_XPATH
|
---|
24 | # include <math.h>
|
---|
25 | # include <float.h>
|
---|
26 | #endif
|
---|
27 |
|
---|
28 | #ifndef PUGIXML_NO_STL
|
---|
29 | # include <istream>
|
---|
30 | # include <ostream>
|
---|
31 | # include <string>
|
---|
32 | #endif
|
---|
33 |
|
---|
34 | // For placement new
|
---|
35 | #include <new>
|
---|
36 |
|
---|
37 | #ifdef _MSC_VER
|
---|
38 | # pragma warning(disable: 4127) // conditional expression is constant
|
---|
39 | # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
|
---|
40 | # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
|
---|
41 | # pragma warning(disable: 4702) // unreachable code
|
---|
42 | # pragma warning(disable: 4996) // this function or variable may be unsafe
|
---|
43 | #endif
|
---|
44 |
|
---|
45 | #ifdef __INTEL_COMPILER
|
---|
46 | # pragma warning(disable: 177) // function was declared but never referenced
|
---|
47 | # pragma warning(disable: 279) // controlling expression is constant
|
---|
48 | # pragma warning(disable: 1478 1786) // function was declared "deprecated"
|
---|
49 | #endif
|
---|
50 |
|
---|
51 | #ifdef __BORLANDC__
|
---|
52 | # pragma warn -8008 // condition is always false
|
---|
53 | # pragma warn -8066 // unreachable code
|
---|
54 | #endif
|
---|
55 |
|
---|
56 | #ifdef __SNC__
|
---|
57 | # pragma diag_suppress=178 // function was declared but never referenced
|
---|
58 | # pragma diag_suppress=237 // controlling expression is constant
|
---|
59 | #endif
|
---|
60 |
|
---|
61 | // uintptr_t
|
---|
62 | #if !defined(_MSC_VER) || _MSC_VER >= 1600
|
---|
63 | # include <stdint.h>
|
---|
64 | #else
|
---|
65 | # if _MSC_VER < 1300
|
---|
66 | // No native uintptr_t in MSVC6
|
---|
67 | typedef size_t uintptr_t;
|
---|
68 | # endif
|
---|
69 | typedef unsigned __int8 uint8_t;
|
---|
70 | typedef unsigned __int16 uint16_t;
|
---|
71 | typedef unsigned __int32 uint32_t;
|
---|
72 | typedef __int32 int32_t;
|
---|
73 | #endif
|
---|
74 |
|
---|
75 | // Inlining controls
|
---|
76 | #if defined(_MSC_VER) && _MSC_VER >= 1300
|
---|
77 | # define PUGIXML_NO_INLINE __declspec(noinline)
|
---|
78 | #elif defined(__GNUC__)
|
---|
79 | # define PUGIXML_NO_INLINE __attribute__((noinline))
|
---|
80 | #else
|
---|
81 | # define PUGIXML_NO_INLINE
|
---|
82 | #endif
|
---|
83 |
|
---|
84 | // Simple static assertion
|
---|
85 | #define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
|
---|
86 |
|
---|
87 | // Digital Mars C++ bug workaround for passing char loaded from memory via stack
|
---|
88 | #ifdef __DMC__
|
---|
89 | # define DMC_VOLATILE volatile
|
---|
90 | #else
|
---|
91 | # define DMC_VOLATILE
|
---|
92 | #endif
|
---|
93 |
|
---|
94 | using namespace pugi;
|
---|
95 |
|
---|
96 | // Memory allocation
|
---|
97 | namespace
|
---|
98 | {
|
---|
99 | void* default_allocate(size_t size)
|
---|
100 | {
|
---|
101 | return malloc(size);
|
---|
102 | }
|
---|
103 |
|
---|
104 | void default_deallocate(void* ptr)
|
---|
105 | {
|
---|
106 | free(ptr);
|
---|
107 | }
|
---|
108 |
|
---|
109 | allocation_function global_allocate = default_allocate;
|
---|
110 | deallocation_function global_deallocate = default_deallocate;
|
---|
111 | }
|
---|
112 |
|
---|
113 | // String utilities
|
---|
114 | namespace
|
---|
115 | {
|
---|
116 | // Get string length
|
---|
117 | size_t strlength(const char_t* s)
|
---|
118 | {
|
---|
119 | assert(s);
|
---|
120 |
|
---|
121 | #ifdef PUGIXML_WCHAR_MODE
|
---|
122 | return wcslen(s);
|
---|
123 | #else
|
---|
124 | return strlen(s);
|
---|
125 | #endif
|
---|
126 | }
|
---|
127 |
|
---|
128 | // Compare two strings
|
---|
129 | bool strequal(const char_t* src, const char_t* dst)
|
---|
130 | {
|
---|
131 | assert(src && dst);
|
---|
132 |
|
---|
133 | #ifdef PUGIXML_WCHAR_MODE
|
---|
134 | return wcscmp(src, dst) == 0;
|
---|
135 | #else
|
---|
136 | return strcmp(src, dst) == 0;
|
---|
137 | #endif
|
---|
138 | }
|
---|
139 |
|
---|
140 | // Compare lhs with [rhs_begin, rhs_end)
|
---|
141 | bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
|
---|
142 | {
|
---|
143 | for (size_t i = 0; i < count; ++i)
|
---|
144 | if (lhs[i] != rhs[i])
|
---|
145 | return false;
|
---|
146 |
|
---|
147 | return lhs[count] == 0;
|
---|
148 | }
|
---|
149 |
|
---|
150 | #ifdef PUGIXML_WCHAR_MODE
|
---|
151 | // Convert string to wide string, assuming all symbols are ASCII
|
---|
152 | void widen_ascii(wchar_t* dest, const char* source)
|
---|
153 | {
|
---|
154 | for (const char* i = source; *i; ++i) *dest++ = *i;
|
---|
155 | *dest = 0;
|
---|
156 | }
|
---|
157 | #endif
|
---|
158 | }
|
---|
159 |
|
---|
160 | #if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
|
---|
161 | // auto_ptr-like buffer holder for exception recovery
|
---|
162 | namespace
|
---|
163 | {
|
---|
164 | struct buffer_holder
|
---|
165 | {
|
---|
166 | void* data;
|
---|
167 | void (*deleter)(void*);
|
---|
168 |
|
---|
169 | buffer_holder(void* data, void (*deleter)(void*)): data(data), deleter(deleter)
|
---|
170 | {
|
---|
171 | }
|
---|
172 |
|
---|
173 | ~buffer_holder()
|
---|
174 | {
|
---|
175 | if (data) deleter(data);
|
---|
176 | }
|
---|
177 |
|
---|
178 | void* release()
|
---|
179 | {
|
---|
180 | void* result = data;
|
---|
181 | data = 0;
|
---|
182 | return result;
|
---|
183 | }
|
---|
184 | };
|
---|
185 | }
|
---|
186 | #endif
|
---|
187 |
|
---|
188 | namespace
|
---|
189 | {
|
---|
190 | static const size_t xml_memory_page_size = 32768;
|
---|
191 |
|
---|
192 | static const uintptr_t xml_memory_page_alignment = 32;
|
---|
193 | static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
|
---|
194 | static const uintptr_t xml_memory_page_name_allocated_mask = 16;
|
---|
195 | static const uintptr_t xml_memory_page_value_allocated_mask = 8;
|
---|
196 | static const uintptr_t xml_memory_page_type_mask = 7;
|
---|
197 |
|
---|
198 | struct xml_allocator;
|
---|
199 |
|
---|
200 | struct xml_memory_page
|
---|
201 | {
|
---|
202 | static xml_memory_page* construct(void* memory)
|
---|
203 | {
|
---|
204 | if (!memory) return 0; //$ redundant, left for performance
|
---|
205 |
|
---|
206 | xml_memory_page* result = static_cast<xml_memory_page*>(memory);
|
---|
207 |
|
---|
208 | result->allocator = 0;
|
---|
209 | result->memory = 0;
|
---|
210 | result->prev = 0;
|
---|
211 | result->next = 0;
|
---|
212 | result->busy_size = 0;
|
---|
213 | result->freed_size = 0;
|
---|
214 |
|
---|
215 | return result;
|
---|
216 | }
|
---|
217 |
|
---|
218 | xml_allocator* allocator;
|
---|
219 |
|
---|
220 | void* memory;
|
---|
221 |
|
---|
222 | xml_memory_page* prev;
|
---|
223 | xml_memory_page* next;
|
---|
224 |
|
---|
225 | size_t busy_size;
|
---|
226 | size_t freed_size;
|
---|
227 |
|
---|
228 | char data[1];
|
---|
229 | };
|
---|
230 |
|
---|
231 | struct xml_memory_string_header
|
---|
232 | {
|
---|
233 | uint16_t page_offset; // offset from page->data
|
---|
234 | uint16_t full_size; // 0 if string occupies whole page
|
---|
235 | };
|
---|
236 |
|
---|
237 | struct xml_allocator
|
---|
238 | {
|
---|
239 | xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
|
---|
240 | {
|
---|
241 | }
|
---|
242 |
|
---|
243 | xml_memory_page* allocate_page(size_t data_size)
|
---|
244 | {
|
---|
245 | size_t size = offsetof(xml_memory_page, data) + data_size;
|
---|
246 |
|
---|
247 | // allocate block with some alignment, leaving memory for worst-case padding
|
---|
248 | void* memory = global_allocate(size + xml_memory_page_alignment);
|
---|
249 | if (!memory) return 0;
|
---|
250 |
|
---|
251 | // align upwards to page boundary
|
---|
252 | void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
|
---|
253 |
|
---|
254 | // prepare page structure
|
---|
255 | xml_memory_page* page = xml_memory_page::construct(page_memory);
|
---|
256 |
|
---|
257 | page->memory = memory;
|
---|
258 | page->allocator = _root->allocator;
|
---|
259 |
|
---|
260 | return page;
|
---|
261 | }
|
---|
262 |
|
---|
263 | static void deallocate_page(xml_memory_page* page)
|
---|
264 | {
|
---|
265 | global_deallocate(page->memory);
|
---|
266 | }
|
---|
267 |
|
---|
268 | void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
|
---|
269 |
|
---|
270 | void* allocate_memory(size_t size, xml_memory_page*& out_page)
|
---|
271 | {
|
---|
272 | if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
|
---|
273 |
|
---|
274 | void* buf = _root->data + _busy_size;
|
---|
275 |
|
---|
276 | _busy_size += size;
|
---|
277 |
|
---|
278 | out_page = _root;
|
---|
279 |
|
---|
280 | return buf;
|
---|
281 | }
|
---|
282 |
|
---|
283 | void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
|
---|
284 | {
|
---|
285 | if (page == _root) page->busy_size = _busy_size;
|
---|
286 |
|
---|
287 | assert(ptr >= page->data && ptr < page->data + page->busy_size);
|
---|
288 | (void)!ptr;
|
---|
289 |
|
---|
290 | page->freed_size += size;
|
---|
291 | assert(page->freed_size <= page->busy_size);
|
---|
292 |
|
---|
293 | if (page->freed_size == page->busy_size)
|
---|
294 | {
|
---|
295 | if (page->next == 0)
|
---|
296 | {
|
---|
297 | assert(_root == page);
|
---|
298 |
|
---|
299 | // top page freed, just reset sizes
|
---|
300 | page->busy_size = page->freed_size = 0;
|
---|
301 | _busy_size = 0;
|
---|
302 | }
|
---|
303 | else
|
---|
304 | {
|
---|
305 | assert(_root != page);
|
---|
306 | assert(page->prev);
|
---|
307 |
|
---|
308 | // remove from the list
|
---|
309 | page->prev->next = page->next;
|
---|
310 | page->next->prev = page->prev;
|
---|
311 |
|
---|
312 | // deallocate
|
---|
313 | deallocate_page(page);
|
---|
314 | }
|
---|
315 | }
|
---|
316 | }
|
---|
317 |
|
---|
318 | char_t* allocate_string(size_t length)
|
---|
319 | {
|
---|
320 | // allocate memory for string and header block
|
---|
321 | size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
|
---|
322 |
|
---|
323 | // round size up to pointer alignment boundary
|
---|
324 | size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
|
---|
325 |
|
---|
326 | xml_memory_page* page;
|
---|
327 | xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
|
---|
328 |
|
---|
329 | if (!header) return 0;
|
---|
330 |
|
---|
331 | // setup header
|
---|
332 | ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
|
---|
333 |
|
---|
334 | assert(page_offset >= 0 && page_offset < (1 << 16));
|
---|
335 | header->page_offset = static_cast<uint16_t>(page_offset);
|
---|
336 |
|
---|
337 | // full_size == 0 for large strings that occupy the whole page
|
---|
338 | assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
|
---|
339 | header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
|
---|
340 |
|
---|
341 | return reinterpret_cast<char_t*>(header + 1);
|
---|
342 | }
|
---|
343 |
|
---|
344 | void deallocate_string(char_t* string)
|
---|
345 | {
|
---|
346 | // get header
|
---|
347 | xml_memory_string_header* header = reinterpret_cast<xml_memory_string_header*>(string) - 1;
|
---|
348 |
|
---|
349 | // deallocate
|
---|
350 | size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
|
---|
351 | xml_memory_page* page = reinterpret_cast<xml_memory_page*>(reinterpret_cast<char*>(header) - page_offset);
|
---|
352 |
|
---|
353 | // if full_size == 0 then this string occupies the whole page
|
---|
354 | size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
|
---|
355 |
|
---|
356 | deallocate_memory(header, full_size, page);
|
---|
357 | }
|
---|
358 |
|
---|
359 | xml_memory_page* _root;
|
---|
360 | size_t _busy_size;
|
---|
361 | };
|
---|
362 |
|
---|
363 | PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
|
---|
364 | {
|
---|
365 | const size_t large_allocation_threshold = xml_memory_page_size / 4;
|
---|
366 |
|
---|
367 | xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
|
---|
368 | if (!page) return 0;
|
---|
369 |
|
---|
370 | if (size <= large_allocation_threshold)
|
---|
371 | {
|
---|
372 | _root->busy_size = _busy_size;
|
---|
373 |
|
---|
374 | // insert page at the end of linked list
|
---|
375 | page->prev = _root;
|
---|
376 | _root->next = page;
|
---|
377 | _root = page;
|
---|
378 |
|
---|
379 | _busy_size = size;
|
---|
380 | }
|
---|
381 | else
|
---|
382 | {
|
---|
383 | // insert page before the end of linked list, so that it is deleted as soon as possible
|
---|
384 | // the last page is not deleted even if it's empty (see deallocate_memory)
|
---|
385 | assert(_root->prev);
|
---|
386 |
|
---|
387 | page->prev = _root->prev;
|
---|
388 | page->next = _root;
|
---|
389 |
|
---|
390 | _root->prev->next = page;
|
---|
391 | _root->prev = page;
|
---|
392 | }
|
---|
393 |
|
---|
394 | // allocate inside page
|
---|
395 | page->busy_size = size;
|
---|
396 |
|
---|
397 | out_page = page;
|
---|
398 | return page->data;
|
---|
399 | }
|
---|
400 | }
|
---|
401 |
|
---|
402 | namespace pugi
|
---|
403 | {
|
---|
404 | /// A 'name=value' XML attribute structure.
|
---|
405 | struct xml_attribute_struct
|
---|
406 | {
|
---|
407 | /// Default ctor
|
---|
408 | xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
|
---|
409 | {
|
---|
410 | }
|
---|
411 |
|
---|
412 | uintptr_t header;
|
---|
413 |
|
---|
414 | char_t* name; ///< Pointer to attribute name.
|
---|
415 | char_t* value; ///< Pointer to attribute value.
|
---|
416 |
|
---|
417 | xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
|
---|
418 | xml_attribute_struct* next_attribute; ///< Next attribute
|
---|
419 | };
|
---|
420 |
|
---|
421 | /// An XML document tree node.
|
---|
422 | struct xml_node_struct
|
---|
423 | {
|
---|
424 | /// Default ctor
|
---|
425 | /// \param type - node type
|
---|
426 | xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
|
---|
427 | {
|
---|
428 | }
|
---|
429 |
|
---|
430 | uintptr_t header;
|
---|
431 |
|
---|
432 | xml_node_struct* parent; ///< Pointer to parent
|
---|
433 |
|
---|
434 | char_t* name; ///< Pointer to element name.
|
---|
435 | char_t* value; ///< Pointer to any associated string data.
|
---|
436 |
|
---|
437 | xml_node_struct* first_child; ///< First child
|
---|
438 |
|
---|
439 | xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
|
---|
440 | xml_node_struct* next_sibling; ///< Right brother
|
---|
441 |
|
---|
442 | xml_attribute_struct* first_attribute; ///< First attribute
|
---|
443 | };
|
---|
444 | }
|
---|
445 |
|
---|
446 | namespace
|
---|
447 | {
|
---|
448 | struct xml_document_struct: public xml_node_struct, public xml_allocator
|
---|
449 | {
|
---|
450 | xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0)
|
---|
451 | {
|
---|
452 | }
|
---|
453 |
|
---|
454 | const char_t* buffer;
|
---|
455 | };
|
---|
456 |
|
---|
457 | static inline xml_allocator& get_allocator(const xml_node_struct* node)
|
---|
458 | {
|
---|
459 | assert(node);
|
---|
460 |
|
---|
461 | return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
|
---|
462 | }
|
---|
463 | }
|
---|
464 |
|
---|
465 | // Low-level DOM operations
|
---|
466 | namespace
|
---|
467 | {
|
---|
468 | inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
|
---|
469 | {
|
---|
470 | xml_memory_page* page;
|
---|
471 | void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
|
---|
472 |
|
---|
473 | return new (memory) xml_attribute_struct(page);
|
---|
474 | }
|
---|
475 |
|
---|
476 | inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
|
---|
477 | {
|
---|
478 | xml_memory_page* page;
|
---|
479 | void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
|
---|
480 |
|
---|
481 | return new (memory) xml_node_struct(page, type);
|
---|
482 | }
|
---|
483 |
|
---|
484 | inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
|
---|
485 | {
|
---|
486 | uintptr_t header = a->header;
|
---|
487 |
|
---|
488 | if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
|
---|
489 | if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
|
---|
490 |
|
---|
491 | alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
|
---|
492 | }
|
---|
493 |
|
---|
494 | inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
|
---|
495 | {
|
---|
496 | uintptr_t header = n->header;
|
---|
497 |
|
---|
498 | if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
|
---|
499 | if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
|
---|
500 |
|
---|
501 | for (xml_attribute_struct* attr = n->first_attribute; attr; )
|
---|
502 | {
|
---|
503 | xml_attribute_struct* next = attr->next_attribute;
|
---|
504 |
|
---|
505 | destroy_attribute(attr, alloc);
|
---|
506 |
|
---|
507 | attr = next;
|
---|
508 | }
|
---|
509 |
|
---|
510 | for (xml_node_struct* child = n->first_child; child; )
|
---|
511 | {
|
---|
512 | xml_node_struct* next = child->next_sibling;
|
---|
513 |
|
---|
514 | destroy_node(child, alloc);
|
---|
515 |
|
---|
516 | child = next;
|
---|
517 | }
|
---|
518 |
|
---|
519 | alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
|
---|
520 | }
|
---|
521 |
|
---|
522 | PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
|
---|
523 | {
|
---|
524 | xml_node_struct* child = allocate_node(alloc, type);
|
---|
525 | if (!child) return 0;
|
---|
526 |
|
---|
527 | child->parent = node;
|
---|
528 |
|
---|
529 | xml_node_struct* first_child = node->first_child;
|
---|
530 |
|
---|
531 | if (first_child)
|
---|
532 | {
|
---|
533 | xml_node_struct* last_child = first_child->prev_sibling_c;
|
---|
534 |
|
---|
535 | last_child->next_sibling = child;
|
---|
536 | child->prev_sibling_c = last_child;
|
---|
537 | first_child->prev_sibling_c = child;
|
---|
538 | }
|
---|
539 | else
|
---|
540 | {
|
---|
541 | node->first_child = child;
|
---|
542 | child->prev_sibling_c = child;
|
---|
543 | }
|
---|
544 |
|
---|
545 | return child;
|
---|
546 | }
|
---|
547 |
|
---|
548 | PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
|
---|
549 | {
|
---|
550 | xml_attribute_struct* a = allocate_attribute(alloc);
|
---|
551 | if (!a) return 0;
|
---|
552 |
|
---|
553 | xml_attribute_struct* first_attribute = node->first_attribute;
|
---|
554 |
|
---|
555 | if (first_attribute)
|
---|
556 | {
|
---|
557 | xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
|
---|
558 |
|
---|
559 | last_attribute->next_attribute = a;
|
---|
560 | a->prev_attribute_c = last_attribute;
|
---|
561 | first_attribute->prev_attribute_c = a;
|
---|
562 | }
|
---|
563 | else
|
---|
564 | {
|
---|
565 | node->first_attribute = a;
|
---|
566 | a->prev_attribute_c = a;
|
---|
567 | }
|
---|
568 |
|
---|
569 | return a;
|
---|
570 | }
|
---|
571 | }
|
---|
572 |
|
---|
573 | // Helper classes for code generation
|
---|
574 | namespace
|
---|
575 | {
|
---|
576 | struct opt_false
|
---|
577 | {
|
---|
578 | enum { value = 0 };
|
---|
579 | };
|
---|
580 |
|
---|
581 | struct opt_true
|
---|
582 | {
|
---|
583 | enum { value = 1 };
|
---|
584 | };
|
---|
585 | }
|
---|
586 |
|
---|
587 | // Unicode utilities
|
---|
588 | namespace
|
---|
589 | {
|
---|
590 | inline uint16_t endian_swap(uint16_t value)
|
---|
591 | {
|
---|
592 | return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
|
---|
593 | }
|
---|
594 |
|
---|
595 | inline uint32_t endian_swap(uint32_t value)
|
---|
596 | {
|
---|
597 | return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
|
---|
598 | }
|
---|
599 |
|
---|
600 | struct utf8_counter
|
---|
601 | {
|
---|
602 | typedef size_t value_type;
|
---|
603 |
|
---|
604 | static value_type low(value_type result, uint32_t ch)
|
---|
605 | {
|
---|
606 | // U+0000..U+007F
|
---|
607 | if (ch < 0x80) return result + 1;
|
---|
608 | // U+0080..U+07FF
|
---|
609 | else if (ch < 0x800) return result + 2;
|
---|
610 | // U+0800..U+FFFF
|
---|
611 | else return result + 3;
|
---|
612 | }
|
---|
613 |
|
---|
614 | static value_type high(value_type result, uint32_t)
|
---|
615 | {
|
---|
616 | // U+10000..U+10FFFF
|
---|
617 | return result + 4;
|
---|
618 | }
|
---|
619 | };
|
---|
620 |
|
---|
621 | struct utf8_writer
|
---|
622 | {
|
---|
623 | typedef uint8_t* value_type;
|
---|
624 |
|
---|
625 | static value_type low(value_type result, uint32_t ch)
|
---|
626 | {
|
---|
627 | // U+0000..U+007F
|
---|
628 | if (ch < 0x80)
|
---|
629 | {
|
---|
630 | *result = static_cast<uint8_t>(ch);
|
---|
631 | return result + 1;
|
---|
632 | }
|
---|
633 | // U+0080..U+07FF
|
---|
634 | else if (ch < 0x800)
|
---|
635 | {
|
---|
636 | result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
|
---|
637 | result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
|
---|
638 | return result + 2;
|
---|
639 | }
|
---|
640 | // U+0800..U+FFFF
|
---|
641 | else
|
---|
642 | {
|
---|
643 | result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
|
---|
644 | result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
|
---|
645 | result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
|
---|
646 | return result + 3;
|
---|
647 | }
|
---|
648 | }
|
---|
649 |
|
---|
650 | static value_type high(value_type result, uint32_t ch)
|
---|
651 | {
|
---|
652 | // U+10000..U+10FFFF
|
---|
653 | result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
|
---|
654 | result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
|
---|
655 | result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
|
---|
656 | result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
|
---|
657 | return result + 4;
|
---|
658 | }
|
---|
659 |
|
---|
660 | static value_type any(value_type result, uint32_t ch)
|
---|
661 | {
|
---|
662 | return (ch < 0x10000) ? low(result, ch) : high(result, ch);
|
---|
663 | }
|
---|
664 | };
|
---|
665 |
|
---|
666 | struct utf16_counter
|
---|
667 | {
|
---|
668 | typedef size_t value_type;
|
---|
669 |
|
---|
670 | static value_type low(value_type result, uint32_t)
|
---|
671 | {
|
---|
672 | return result + 1;
|
---|
673 | }
|
---|
674 |
|
---|
675 | static value_type high(value_type result, uint32_t)
|
---|
676 | {
|
---|
677 | return result + 2;
|
---|
678 | }
|
---|
679 | };
|
---|
680 |
|
---|
681 | struct utf16_writer
|
---|
682 | {
|
---|
683 | typedef uint16_t* value_type;
|
---|
684 |
|
---|
685 | static value_type low(value_type result, uint32_t ch)
|
---|
686 | {
|
---|
687 | *result = static_cast<uint16_t>(ch);
|
---|
688 |
|
---|
689 | return result + 1;
|
---|
690 | }
|
---|
691 |
|
---|
692 | static value_type high(value_type result, uint32_t ch)
|
---|
693 | {
|
---|
694 | uint32_t msh = (uint32_t)(ch - 0x10000) >> 10;
|
---|
695 | uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff;
|
---|
696 |
|
---|
697 | result[0] = static_cast<uint16_t>(0xD800 + msh);
|
---|
698 | result[1] = static_cast<uint16_t>(0xDC00 + lsh);
|
---|
699 |
|
---|
700 | return result + 2;
|
---|
701 | }
|
---|
702 |
|
---|
703 | static value_type any(value_type result, uint32_t ch)
|
---|
704 | {
|
---|
705 | return (ch < 0x10000) ? low(result, ch) : high(result, ch);
|
---|
706 | }
|
---|
707 | };
|
---|
708 |
|
---|
709 | struct utf32_counter
|
---|
710 | {
|
---|
711 | typedef size_t value_type;
|
---|
712 |
|
---|
713 | static value_type low(value_type result, uint32_t)
|
---|
714 | {
|
---|
715 | return result + 1;
|
---|
716 | }
|
---|
717 |
|
---|
718 | static value_type high(value_type result, uint32_t)
|
---|
719 | {
|
---|
720 | return result + 1;
|
---|
721 | }
|
---|
722 | };
|
---|
723 |
|
---|
724 | struct utf32_writer
|
---|
725 | {
|
---|
726 | typedef uint32_t* value_type;
|
---|
727 |
|
---|
728 | static value_type low(value_type result, uint32_t ch)
|
---|
729 | {
|
---|
730 | *result = ch;
|
---|
731 |
|
---|
732 | return result + 1;
|
---|
733 | }
|
---|
734 |
|
---|
735 | static value_type high(value_type result, uint32_t ch)
|
---|
736 | {
|
---|
737 | *result = ch;
|
---|
738 |
|
---|
739 | return result + 1;
|
---|
740 | }
|
---|
741 |
|
---|
742 | static value_type any(value_type result, uint32_t ch)
|
---|
743 | {
|
---|
744 | *result = ch;
|
---|
745 |
|
---|
746 | return result + 1;
|
---|
747 | }
|
---|
748 | };
|
---|
749 |
|
---|
750 | template <size_t size> struct wchar_selector;
|
---|
751 |
|
---|
752 | template <> struct wchar_selector<2>
|
---|
753 | {
|
---|
754 | typedef uint16_t type;
|
---|
755 | typedef utf16_counter counter;
|
---|
756 | typedef utf16_writer writer;
|
---|
757 | };
|
---|
758 |
|
---|
759 | template <> struct wchar_selector<4>
|
---|
760 | {
|
---|
761 | typedef uint32_t type;
|
---|
762 | typedef utf32_counter counter;
|
---|
763 | typedef utf32_writer writer;
|
---|
764 | };
|
---|
765 |
|
---|
766 | typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
|
---|
767 | typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
|
---|
768 |
|
---|
769 | template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
|
---|
770 | {
|
---|
771 | static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
|
---|
772 | {
|
---|
773 | const uint8_t utf8_byte_mask = 0x3f;
|
---|
774 |
|
---|
775 | while (size)
|
---|
776 | {
|
---|
777 | uint8_t lead = *data;
|
---|
778 |
|
---|
779 | // 0xxxxxxx -> U+0000..U+007F
|
---|
780 | if (lead < 0x80)
|
---|
781 | {
|
---|
782 | result = Traits::low(result, lead);
|
---|
783 | data += 1;
|
---|
784 | size -= 1;
|
---|
785 |
|
---|
786 | // process aligned single-byte (ascii) blocks
|
---|
787 | if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
|
---|
788 | {
|
---|
789 | while (size >= 4 && (*reinterpret_cast<const uint32_t*>(data) & 0x80808080) == 0)
|
---|
790 | {
|
---|
791 | result = Traits::low(result, data[0]);
|
---|
792 | result = Traits::low(result, data[1]);
|
---|
793 | result = Traits::low(result, data[2]);
|
---|
794 | result = Traits::low(result, data[3]);
|
---|
795 | data += 4;
|
---|
796 | size -= 4;
|
---|
797 | }
|
---|
798 | }
|
---|
799 | }
|
---|
800 | // 110xxxxx -> U+0080..U+07FF
|
---|
801 | else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
|
---|
802 | {
|
---|
803 | result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
|
---|
804 | data += 2;
|
---|
805 | size -= 2;
|
---|
806 | }
|
---|
807 | // 1110xxxx -> U+0800-U+FFFF
|
---|
808 | else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
|
---|
809 | {
|
---|
810 | result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
|
---|
811 | data += 3;
|
---|
812 | size -= 3;
|
---|
813 | }
|
---|
814 | // 11110xxx -> U+10000..U+10FFFF
|
---|
815 | else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
|
---|
816 | {
|
---|
817 | result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
|
---|
818 | data += 4;
|
---|
819 | size -= 4;
|
---|
820 | }
|
---|
821 | // 10xxxxxx or 11111xxx -> invalid
|
---|
822 | else
|
---|
823 | {
|
---|
824 | data += 1;
|
---|
825 | size -= 1;
|
---|
826 | }
|
---|
827 | }
|
---|
828 |
|
---|
829 | return result;
|
---|
830 | }
|
---|
831 |
|
---|
832 | static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
|
---|
833 | {
|
---|
834 | const uint16_t* end = data + size;
|
---|
835 |
|
---|
836 | while (data < end)
|
---|
837 | {
|
---|
838 | uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
|
---|
839 |
|
---|
840 | // U+0000..U+D7FF
|
---|
841 | if (lead < 0xD800)
|
---|
842 | {
|
---|
843 | result = Traits::low(result, lead);
|
---|
844 | data += 1;
|
---|
845 | }
|
---|
846 | // U+E000..U+FFFF
|
---|
847 | else if ((unsigned)(lead - 0xE000) < 0x2000)
|
---|
848 | {
|
---|
849 | result = Traits::low(result, lead);
|
---|
850 | data += 1;
|
---|
851 | }
|
---|
852 | // surrogate pair lead
|
---|
853 | else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end)
|
---|
854 | {
|
---|
855 | uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
|
---|
856 |
|
---|
857 | if ((unsigned)(next - 0xDC00) < 0x400)
|
---|
858 | {
|
---|
859 | result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
|
---|
860 | data += 2;
|
---|
861 | }
|
---|
862 | else
|
---|
863 | {
|
---|
864 | data += 1;
|
---|
865 | }
|
---|
866 | }
|
---|
867 | else
|
---|
868 | {
|
---|
869 | data += 1;
|
---|
870 | }
|
---|
871 | }
|
---|
872 |
|
---|
873 | return result;
|
---|
874 | }
|
---|
875 |
|
---|
876 | static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
|
---|
877 | {
|
---|
878 | const uint32_t* end = data + size;
|
---|
879 |
|
---|
880 | while (data < end)
|
---|
881 | {
|
---|
882 | uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
|
---|
883 |
|
---|
884 | // U+0000..U+FFFF
|
---|
885 | if (lead < 0x10000)
|
---|
886 | {
|
---|
887 | result = Traits::low(result, lead);
|
---|
888 | data += 1;
|
---|
889 | }
|
---|
890 | // U+10000..U+10FFFF
|
---|
891 | else
|
---|
892 | {
|
---|
893 | result = Traits::high(result, lead);
|
---|
894 | data += 1;
|
---|
895 | }
|
---|
896 | }
|
---|
897 |
|
---|
898 | return result;
|
---|
899 | }
|
---|
900 | };
|
---|
901 |
|
---|
902 | template <typename T> inline void convert_utf_endian_swap(T* result, const T* data, size_t length)
|
---|
903 | {
|
---|
904 | for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
|
---|
905 | }
|
---|
906 |
|
---|
907 | inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
|
---|
908 | {
|
---|
909 | for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
|
---|
910 | }
|
---|
911 | }
|
---|
912 |
|
---|
913 | namespace
|
---|
914 | {
|
---|
915 | enum chartype_t
|
---|
916 | {
|
---|
917 | ct_parse_pcdata = 1, // \0, &, \r, <
|
---|
918 | ct_parse_attr = 2, // \0, &, \r, ', "
|
---|
919 | ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
|
---|
920 | ct_space = 8, // \r, \n, space, tab
|
---|
921 | ct_parse_cdata = 16, // \0, ], >, \r
|
---|
922 | ct_parse_comment = 32, // \0, -, >, \r
|
---|
923 | ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
|
---|
924 | ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
|
---|
925 | };
|
---|
926 |
|
---|
927 | const unsigned char chartype_table[256] =
|
---|
928 | {
|
---|
929 | 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
|
---|
930 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
|
---|
931 | 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
|
---|
932 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
|
---|
933 | 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
|
---|
934 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
|
---|
935 | 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
|
---|
936 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
|
---|
937 |
|
---|
938 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
|
---|
939 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
|
---|
940 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
|
---|
941 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
|
---|
942 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
|
---|
943 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
|
---|
944 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
|
---|
945 | 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
|
---|
946 | };
|
---|
947 |
|
---|
948 | enum chartypex_t
|
---|
949 | {
|
---|
950 | ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
|
---|
951 | ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
|
---|
952 | ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
|
---|
953 | ctx_digit = 8, // 0-9
|
---|
954 | ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
|
---|
955 | };
|
---|
956 |
|
---|
957 | const unsigned char chartypex_table[256] =
|
---|
958 | {
|
---|
959 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
|
---|
960 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
|
---|
961 | 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
|
---|
962 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
|
---|
963 |
|
---|
964 | 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
|
---|
965 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
|
---|
966 | 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
|
---|
967 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
|
---|
968 |
|
---|
969 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
|
---|
970 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
---|
971 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
---|
972 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
---|
973 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
---|
974 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
---|
975 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
---|
976 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
---|
977 | };
|
---|
978 |
|
---|
979 | #ifdef PUGIXML_WCHAR_MODE
|
---|
980 | #define IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
|
---|
981 | #else
|
---|
982 | #define IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
|
---|
983 | #endif
|
---|
984 |
|
---|
985 | #define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table)
|
---|
986 | #define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table)
|
---|
987 |
|
---|
988 | bool is_little_endian()
|
---|
989 | {
|
---|
990 | unsigned int ui = 1;
|
---|
991 |
|
---|
992 | return *reinterpret_cast<unsigned char*>(&ui) == 1;
|
---|
993 | }
|
---|
994 |
|
---|
995 | xml_encoding get_wchar_encoding()
|
---|
996 | {
|
---|
997 | STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
|
---|
998 |
|
---|
999 | if (sizeof(wchar_t) == 2)
|
---|
1000 | return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
|
---|
1001 | else
|
---|
1002 | return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
|
---|
1003 | }
|
---|
1004 |
|
---|
1005 | xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
|
---|
1006 | {
|
---|
1007 | // look for BOM in first few bytes
|
---|
1008 | if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
|
---|
1009 | if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
|
---|
1010 | if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
|
---|
1011 | if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
|
---|
1012 | if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
|
---|
1013 |
|
---|
1014 | // look for <, <? or <?xm in various encodings
|
---|
1015 | if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
|
---|
1016 | if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
|
---|
1017 | if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
|
---|
1018 | if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
|
---|
1019 | if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
|
---|
1020 |
|
---|
1021 | // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
|
---|
1022 | if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
|
---|
1023 | if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
|
---|
1024 |
|
---|
1025 | // no known BOM detected, assume utf8
|
---|
1026 | return encoding_utf8;
|
---|
1027 | }
|
---|
1028 |
|
---|
1029 | xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
|
---|
1030 | {
|
---|
1031 | // replace wchar encoding with utf implementation
|
---|
1032 | if (encoding == encoding_wchar) return get_wchar_encoding();
|
---|
1033 |
|
---|
1034 | // replace utf16 encoding with utf16 with specific endianness
|
---|
1035 | if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
|
---|
1036 |
|
---|
1037 | // replace utf32 encoding with utf32 with specific endianness
|
---|
1038 | if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
|
---|
1039 |
|
---|
1040 | // only do autodetection if no explicit encoding is requested
|
---|
1041 | if (encoding != encoding_auto) return encoding;
|
---|
1042 |
|
---|
1043 | // skip encoding autodetection if input buffer is too small
|
---|
1044 | if (size < 4) return encoding_utf8;
|
---|
1045 |
|
---|
1046 | // try to guess encoding (based on XML specification, Appendix F.1)
|
---|
1047 | const uint8_t* data = static_cast<const uint8_t*>(contents);
|
---|
1048 |
|
---|
1049 | DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
|
---|
1050 |
|
---|
1051 | return guess_buffer_encoding(d0, d1, d2, d3);
|
---|
1052 | }
|
---|
1053 |
|
---|
1054 | bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
|
---|
1055 | {
|
---|
1056 | if (is_mutable)
|
---|
1057 | {
|
---|
1058 | out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
|
---|
1059 | }
|
---|
1060 | else
|
---|
1061 | {
|
---|
1062 | void* buffer = global_allocate(size > 0 ? size : 1);
|
---|
1063 | if (!buffer) return false;
|
---|
1064 |
|
---|
1065 | memcpy(buffer, contents, size);
|
---|
1066 |
|
---|
1067 | out_buffer = static_cast<char_t*>(buffer);
|
---|
1068 | }
|
---|
1069 |
|
---|
1070 | out_length = size / sizeof(char_t);
|
---|
1071 |
|
---|
1072 | return true;
|
---|
1073 | }
|
---|
1074 |
|
---|
1075 | #ifdef PUGIXML_WCHAR_MODE
|
---|
1076 | inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
|
---|
1077 | {
|
---|
1078 | return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
|
---|
1079 | (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
|
---|
1080 | }
|
---|
1081 |
|
---|
1082 | bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
|
---|
1083 | {
|
---|
1084 | const char_t* data = static_cast<const char_t*>(contents);
|
---|
1085 |
|
---|
1086 | if (is_mutable)
|
---|
1087 | {
|
---|
1088 | out_buffer = const_cast<char_t*>(data);
|
---|
1089 | }
|
---|
1090 | else
|
---|
1091 | {
|
---|
1092 | out_buffer = static_cast<char_t*>(global_allocate(size > 0 ? size : 1));
|
---|
1093 | if (!out_buffer) return false;
|
---|
1094 | }
|
---|
1095 |
|
---|
1096 | out_length = size / sizeof(char_t);
|
---|
1097 |
|
---|
1098 | convert_wchar_endian_swap(out_buffer, data, out_length);
|
---|
1099 |
|
---|
1100 | return true;
|
---|
1101 | }
|
---|
1102 |
|
---|
1103 | bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
|
---|
1104 | {
|
---|
1105 | const uint8_t* data = static_cast<const uint8_t*>(contents);
|
---|
1106 |
|
---|
1107 | // first pass: get length in wchar_t units
|
---|
1108 | out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
|
---|
1109 |
|
---|
1110 | // allocate buffer of suitable length
|
---|
1111 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
|
---|
1112 | if (!out_buffer) return false;
|
---|
1113 |
|
---|
1114 | // second pass: convert utf8 input to wchar_t
|
---|
1115 | wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
|
---|
1116 | wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
|
---|
1117 |
|
---|
1118 | assert(out_end == out_begin + out_length);
|
---|
1119 | (void)!out_end;
|
---|
1120 |
|
---|
1121 | return true;
|
---|
1122 | }
|
---|
1123 |
|
---|
1124 | template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
|
---|
1125 | {
|
---|
1126 | const uint16_t* data = static_cast<const uint16_t*>(contents);
|
---|
1127 | size_t length = size / sizeof(uint16_t);
|
---|
1128 |
|
---|
1129 | // first pass: get length in wchar_t units
|
---|
1130 | out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
|
---|
1131 |
|
---|
1132 | // allocate buffer of suitable length
|
---|
1133 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
|
---|
1134 | if (!out_buffer) return false;
|
---|
1135 |
|
---|
1136 | // second pass: convert utf16 input to wchar_t
|
---|
1137 | wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
|
---|
1138 | wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
|
---|
1139 |
|
---|
1140 | assert(out_end == out_begin + out_length);
|
---|
1141 | (void)!out_end;
|
---|
1142 |
|
---|
1143 | return true;
|
---|
1144 | }
|
---|
1145 |
|
---|
1146 | template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
|
---|
1147 | {
|
---|
1148 | const uint32_t* data = static_cast<const uint32_t*>(contents);
|
---|
1149 | size_t length = size / sizeof(uint32_t);
|
---|
1150 |
|
---|
1151 | // first pass: get length in wchar_t units
|
---|
1152 | out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
|
---|
1153 |
|
---|
1154 | // allocate buffer of suitable length
|
---|
1155 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
|
---|
1156 | if (!out_buffer) return false;
|
---|
1157 |
|
---|
1158 | // second pass: convert utf32 input to wchar_t
|
---|
1159 | wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
|
---|
1160 | wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
|
---|
1161 |
|
---|
1162 | assert(out_end == out_begin + out_length);
|
---|
1163 | (void)!out_end;
|
---|
1164 |
|
---|
1165 | return true;
|
---|
1166 | }
|
---|
1167 |
|
---|
1168 | bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
|
---|
1169 | {
|
---|
1170 | // get native encoding
|
---|
1171 | xml_encoding wchar_encoding = get_wchar_encoding();
|
---|
1172 |
|
---|
1173 | // fast path: no conversion required
|
---|
1174 | if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
|
---|
1175 |
|
---|
1176 | // only endian-swapping is required
|
---|
1177 | if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
|
---|
1178 |
|
---|
1179 | // source encoding is utf8
|
---|
1180 | if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
|
---|
1181 |
|
---|
1182 | // source encoding is utf16
|
---|
1183 | if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
|
---|
1184 | {
|
---|
1185 | xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
|
---|
1186 |
|
---|
1187 | return (native_encoding == encoding) ?
|
---|
1188 | convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
|
---|
1189 | convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
|
---|
1190 | }
|
---|
1191 |
|
---|
1192 | // source encoding is utf32
|
---|
1193 | if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
|
---|
1194 | {
|
---|
1195 | xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
|
---|
1196 |
|
---|
1197 | return (native_encoding == encoding) ?
|
---|
1198 | convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
|
---|
1199 | convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
|
---|
1200 | }
|
---|
1201 |
|
---|
1202 | assert(!"Invalid encoding");
|
---|
1203 | return false;
|
---|
1204 | }
|
---|
1205 | #else
|
---|
1206 | template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
|
---|
1207 | {
|
---|
1208 | const uint16_t* data = static_cast<const uint16_t*>(contents);
|
---|
1209 | size_t length = size / sizeof(uint16_t);
|
---|
1210 |
|
---|
1211 | // first pass: get length in utf8 units
|
---|
1212 | out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
|
---|
1213 |
|
---|
1214 | // allocate buffer of suitable length
|
---|
1215 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
|
---|
1216 | if (!out_buffer) return false;
|
---|
1217 |
|
---|
1218 | // second pass: convert utf16 input to utf8
|
---|
1219 | uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
|
---|
1220 | uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
|
---|
1221 |
|
---|
1222 | assert(out_end == out_begin + out_length);
|
---|
1223 | (void)!out_end;
|
---|
1224 |
|
---|
1225 | return true;
|
---|
1226 | }
|
---|
1227 |
|
---|
1228 | template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
|
---|
1229 | {
|
---|
1230 | const uint32_t* data = static_cast<const uint32_t*>(contents);
|
---|
1231 | size_t length = size / sizeof(uint32_t);
|
---|
1232 |
|
---|
1233 | // first pass: get length in utf8 units
|
---|
1234 | out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
|
---|
1235 |
|
---|
1236 | // allocate buffer of suitable length
|
---|
1237 | out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
|
---|
1238 | if (!out_buffer) return false;
|
---|
1239 |
|
---|
1240 | // second pass: convert utf32 input to utf8
|
---|
1241 | uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
|
---|
1242 | uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
|
---|
1243 |
|
---|
1244 | assert(out_end == out_begin + out_length);
|
---|
1245 | (void)!out_end;
|
---|
1246 |
|
---|
1247 | return true;
|
---|
1248 | }
|
---|
1249 |
|
---|
1250 | bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
|
---|
1251 | {
|
---|
1252 | // fast path: no conversion required
|
---|
1253 | if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
|
---|
1254 |
|
---|
1255 | // source encoding is utf16
|
---|
1256 | if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
|
---|
1257 | {
|
---|
1258 | xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
|
---|
1259 |
|
---|
1260 | return (native_encoding == encoding) ?
|
---|
1261 | convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
|
---|
1262 | convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
|
---|
1263 | }
|
---|
1264 |
|
---|
1265 | // source encoding is utf32
|
---|
1266 | if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
|
---|
1267 | {
|
---|
1268 | xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
|
---|
1269 |
|
---|
1270 | return (native_encoding == encoding) ?
|
---|
1271 | convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
|
---|
1272 | convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
|
---|
1273 | }
|
---|
1274 |
|
---|
1275 | assert(!"Invalid encoding");
|
---|
1276 | return false;
|
---|
1277 | }
|
---|
1278 | #endif
|
---|
1279 |
|
---|
1280 | size_t as_utf8_begin(const wchar_t* str, size_t length)
|
---|
1281 | {
|
---|
1282 | STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
|
---|
1283 |
|
---|
1284 | // get length in utf8 characters
|
---|
1285 | return sizeof(wchar_t) == 2 ?
|
---|
1286 | utf_decoder<utf8_counter>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, 0) :
|
---|
1287 | utf_decoder<utf8_counter>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, 0);
|
---|
1288 | }
|
---|
1289 |
|
---|
1290 | void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
|
---|
1291 | {
|
---|
1292 | STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
|
---|
1293 |
|
---|
1294 | // convert to utf8
|
---|
1295 | uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
|
---|
1296 | uint8_t* end = sizeof(wchar_t) == 2 ?
|
---|
1297 | utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, begin) :
|
---|
1298 | utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, begin);
|
---|
1299 |
|
---|
1300 | assert(begin + size == end);
|
---|
1301 | (void)!end;
|
---|
1302 |
|
---|
1303 | // zero-terminate
|
---|
1304 | buffer[size] = 0;
|
---|
1305 | }
|
---|
1306 |
|
---|
1307 | #ifndef PUGIXML_NO_STL
|
---|
1308 | std::string as_utf8_impl(const wchar_t* str, size_t length)
|
---|
1309 | {
|
---|
1310 | // first pass: get length in utf8 characters
|
---|
1311 | size_t size = as_utf8_begin(str, length);
|
---|
1312 |
|
---|
1313 | // allocate resulting string
|
---|
1314 | std::string result;
|
---|
1315 | result.resize(size);
|
---|
1316 |
|
---|
1317 | // second pass: convert to utf8
|
---|
1318 | if (size > 0) as_utf8_end(&result[0], size, str, length);
|
---|
1319 |
|
---|
1320 | return result;
|
---|
1321 | }
|
---|
1322 |
|
---|
1323 | std::wstring as_wide_impl(const char* str, size_t size)
|
---|
1324 | {
|
---|
1325 | const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
|
---|
1326 |
|
---|
1327 | // first pass: get length in wchar_t units
|
---|
1328 | size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
|
---|
1329 |
|
---|
1330 | // allocate resulting string
|
---|
1331 | std::wstring result;
|
---|
1332 | result.resize(length);
|
---|
1333 |
|
---|
1334 | // second pass: convert to wchar_t
|
---|
1335 | if (length > 0)
|
---|
1336 | {
|
---|
1337 | wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
|
---|
1338 | wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
|
---|
1339 |
|
---|
1340 | assert(begin + length == end);
|
---|
1341 | (void)!end;
|
---|
1342 | }
|
---|
1343 |
|
---|
1344 | return result;
|
---|
1345 | }
|
---|
1346 | #endif
|
---|
1347 |
|
---|
1348 | inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
|
---|
1349 | {
|
---|
1350 | assert(target);
|
---|
1351 | size_t target_length = strlength(target);
|
---|
1352 |
|
---|
1353 | // always reuse document buffer memory if possible
|
---|
1354 | if (!allocated) return target_length >= length;
|
---|
1355 |
|
---|
1356 | // reuse heap memory if waste is not too great
|
---|
1357 | const size_t reuse_threshold = 32;
|
---|
1358 |
|
---|
1359 | return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
|
---|
1360 | }
|
---|
1361 |
|
---|
1362 | bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
|
---|
1363 | {
|
---|
1364 | size_t source_length = strlength(source);
|
---|
1365 |
|
---|
1366 | if (source_length == 0)
|
---|
1367 | {
|
---|
1368 | // empty string and null pointer are equivalent, so just deallocate old memory
|
---|
1369 | xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
|
---|
1370 |
|
---|
1371 | if (header & header_mask) alloc->deallocate_string(dest);
|
---|
1372 |
|
---|
1373 | // mark the string as not allocated
|
---|
1374 | dest = 0;
|
---|
1375 | header &= ~header_mask;
|
---|
1376 |
|
---|
1377 | return true;
|
---|
1378 | }
|
---|
1379 | else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
|
---|
1380 | {
|
---|
1381 | // we can reuse old buffer, so just copy the new data (including zero terminator)
|
---|
1382 | memcpy(dest, source, (source_length + 1) * sizeof(char_t));
|
---|
1383 |
|
---|
1384 | return true;
|
---|
1385 | }
|
---|
1386 | else
|
---|
1387 | {
|
---|
1388 | xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
|
---|
1389 |
|
---|
1390 | // allocate new buffer
|
---|
1391 | char_t* buf = alloc->allocate_string(source_length + 1);
|
---|
1392 | if (!buf) return false;
|
---|
1393 |
|
---|
1394 | // copy the string (including zero terminator)
|
---|
1395 | memcpy(buf, source, (source_length + 1) * sizeof(char_t));
|
---|
1396 |
|
---|
1397 | // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
|
---|
1398 | if (header & header_mask) alloc->deallocate_string(dest);
|
---|
1399 |
|
---|
1400 | // the string is now allocated, so set the flag
|
---|
1401 | dest = buf;
|
---|
1402 | header |= header_mask;
|
---|
1403 |
|
---|
1404 | return true;
|
---|
1405 | }
|
---|
1406 | }
|
---|
1407 |
|
---|
1408 | struct gap
|
---|
1409 | {
|
---|
1410 | char_t* end;
|
---|
1411 | size_t size;
|
---|
1412 |
|
---|
1413 | gap(): end(0), size(0)
|
---|
1414 | {
|
---|
1415 | }
|
---|
1416 |
|
---|
1417 | // Push new gap, move s count bytes further (skipping the gap).
|
---|
1418 | // Collapse previous gap.
|
---|
1419 | void push(char_t*& s, size_t count)
|
---|
1420 | {
|
---|
1421 | if (end) // there was a gap already; collapse it
|
---|
1422 | {
|
---|
1423 | // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
|
---|
1424 | assert(s >= end);
|
---|
1425 | memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
|
---|
1426 | }
|
---|
1427 |
|
---|
1428 | s += count; // end of current gap
|
---|
1429 |
|
---|
1430 | // "merge" two gaps
|
---|
1431 | end = s;
|
---|
1432 | size += count;
|
---|
1433 | }
|
---|
1434 |
|
---|
1435 | // Collapse all gaps, return past-the-end pointer
|
---|
1436 | char_t* flush(char_t* s)
|
---|
1437 | {
|
---|
1438 | if (end)
|
---|
1439 | {
|
---|
1440 | // Move [old_gap_end, current_pos) to [old_gap_start, ...)
|
---|
1441 | assert(s >= end);
|
---|
1442 | memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
|
---|
1443 |
|
---|
1444 | return s - size;
|
---|
1445 | }
|
---|
1446 | else return s;
|
---|
1447 | }
|
---|
1448 | };
|
---|
1449 |
|
---|
1450 | char_t* strconv_escape(char_t* s, gap& g)
|
---|
1451 | {
|
---|
1452 | char_t* stre = s + 1;
|
---|
1453 |
|
---|
1454 | switch (*stre)
|
---|
1455 | {
|
---|
1456 | case '#': // &#...
|
---|
1457 | {
|
---|
1458 | unsigned int ucsc = 0;
|
---|
1459 |
|
---|
1460 | if (stre[1] == 'x') // &#x... (hex code)
|
---|
1461 | {
|
---|
1462 | stre += 2;
|
---|
1463 |
|
---|
1464 | char_t ch = *stre;
|
---|
1465 |
|
---|
1466 | if (ch == ';') return stre;
|
---|
1467 |
|
---|
1468 | for (;;)
|
---|
1469 | {
|
---|
1470 | if (static_cast<unsigned int>(ch - '0') <= 9)
|
---|
1471 | ucsc = 16 * ucsc + (ch - '0');
|
---|
1472 | else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
|
---|
1473 | ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
|
---|
1474 | else if (ch == ';')
|
---|
1475 | break;
|
---|
1476 | else // cancel
|
---|
1477 | return stre;
|
---|
1478 |
|
---|
1479 | ch = *++stre;
|
---|
1480 | }
|
---|
1481 |
|
---|
1482 | ++stre;
|
---|
1483 | }
|
---|
1484 | else // &#... (dec code)
|
---|
1485 | {
|
---|
1486 | char_t ch = *++stre;
|
---|
1487 |
|
---|
1488 | if (ch == ';') return stre;
|
---|
1489 |
|
---|
1490 | for (;;)
|
---|
1491 | {
|
---|
1492 | if (static_cast<unsigned int>(ch - '0') <= 9)
|
---|
1493 | ucsc = 10 * ucsc + (ch - '0');
|
---|
1494 | else if (ch == ';')
|
---|
1495 | break;
|
---|
1496 | else // cancel
|
---|
1497 | return stre;
|
---|
1498 |
|
---|
1499 | ch = *++stre;
|
---|
1500 | }
|
---|
1501 |
|
---|
1502 | ++stre;
|
---|
1503 | }
|
---|
1504 |
|
---|
1505 | #ifdef PUGIXML_WCHAR_MODE
|
---|
1506 | s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
|
---|
1507 | #else
|
---|
1508 | s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
|
---|
1509 | #endif
|
---|
1510 |
|
---|
1511 | g.push(s, stre - s);
|
---|
1512 | return stre;
|
---|
1513 | }
|
---|
1514 | case 'a': // &a
|
---|
1515 | {
|
---|
1516 | ++stre;
|
---|
1517 |
|
---|
1518 | if (*stre == 'm') // &am
|
---|
1519 | {
|
---|
1520 | if (*++stre == 'p' && *++stre == ';') // &
|
---|
1521 | {
|
---|
1522 | *s++ = '&';
|
---|
1523 | ++stre;
|
---|
1524 |
|
---|
1525 | g.push(s, stre - s);
|
---|
1526 | return stre;
|
---|
1527 | }
|
---|
1528 | }
|
---|
1529 | else if (*stre == 'p') // &ap
|
---|
1530 | {
|
---|
1531 | if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
|
---|
1532 | {
|
---|
1533 | *s++ = '\'';
|
---|
1534 | ++stre;
|
---|
1535 |
|
---|
1536 | g.push(s, stre - s);
|
---|
1537 | return stre;
|
---|
1538 | }
|
---|
1539 | }
|
---|
1540 | break;
|
---|
1541 | }
|
---|
1542 | case 'g': // &g
|
---|
1543 | {
|
---|
1544 | if (*++stre == 't' && *++stre == ';') // >
|
---|
1545 | {
|
---|
1546 | *s++ = '>';
|
---|
1547 | ++stre;
|
---|
1548 |
|
---|
1549 | g.push(s, stre - s);
|
---|
1550 | return stre;
|
---|
1551 | }
|
---|
1552 | break;
|
---|
1553 | }
|
---|
1554 | case 'l': // &l
|
---|
1555 | {
|
---|
1556 | if (*++stre == 't' && *++stre == ';') // <
|
---|
1557 | {
|
---|
1558 | *s++ = '<';
|
---|
1559 | ++stre;
|
---|
1560 |
|
---|
1561 | g.push(s, stre - s);
|
---|
1562 | return stre;
|
---|
1563 | }
|
---|
1564 | break;
|
---|
1565 | }
|
---|
1566 | case 'q': // &q
|
---|
1567 | {
|
---|
1568 | if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
|
---|
1569 | {
|
---|
1570 | *s++ = '"';
|
---|
1571 | ++stre;
|
---|
1572 |
|
---|
1573 | g.push(s, stre - s);
|
---|
1574 | return stre;
|
---|
1575 | }
|
---|
1576 | break;
|
---|
1577 | }
|
---|
1578 | }
|
---|
1579 |
|
---|
1580 | return stre;
|
---|
1581 | }
|
---|
1582 |
|
---|
1583 | // Utility macro for last character handling
|
---|
1584 | #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
|
---|
1585 |
|
---|
1586 | char_t* strconv_comment(char_t* s, char_t endch)
|
---|
1587 | {
|
---|
1588 | gap g;
|
---|
1589 |
|
---|
1590 | while (true)
|
---|
1591 | {
|
---|
1592 | while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s;
|
---|
1593 |
|
---|
1594 | if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
|
---|
1595 | {
|
---|
1596 | *s++ = '\n'; // replace first one with 0x0a
|
---|
1597 |
|
---|
1598 | if (*s == '\n') g.push(s, 1);
|
---|
1599 | }
|
---|
1600 | else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
|
---|
1601 | {
|
---|
1602 | *g.flush(s) = 0;
|
---|
1603 |
|
---|
1604 | return s + (s[2] == '>' ? 3 : 2);
|
---|
1605 | }
|
---|
1606 | else if (*s == 0)
|
---|
1607 | {
|
---|
1608 | return 0;
|
---|
1609 | }
|
---|
1610 | else ++s;
|
---|
1611 | }
|
---|
1612 | }
|
---|
1613 |
|
---|
1614 | char_t* strconv_cdata(char_t* s, char_t endch)
|
---|
1615 | {
|
---|
1616 | gap g;
|
---|
1617 |
|
---|
1618 | while (true)
|
---|
1619 | {
|
---|
1620 | while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
|
---|
1621 |
|
---|
1622 | if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
|
---|
1623 | {
|
---|
1624 | *s++ = '\n'; // replace first one with 0x0a
|
---|
1625 |
|
---|
1626 | if (*s == '\n') g.push(s, 1);
|
---|
1627 | }
|
---|
1628 | else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
|
---|
1629 | {
|
---|
1630 | *g.flush(s) = 0;
|
---|
1631 |
|
---|
1632 | return s + 1;
|
---|
1633 | }
|
---|
1634 | else if (*s == 0)
|
---|
1635 | {
|
---|
1636 | return 0;
|
---|
1637 | }
|
---|
1638 | else ++s;
|
---|
1639 | }
|
---|
1640 | }
|
---|
1641 |
|
---|
1642 | typedef char_t* (*strconv_pcdata_t)(char_t*);
|
---|
1643 |
|
---|
1644 | template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
|
---|
1645 | {
|
---|
1646 | static char_t* parse(char_t* s)
|
---|
1647 | {
|
---|
1648 | gap g;
|
---|
1649 |
|
---|
1650 | while (true)
|
---|
1651 | {
|
---|
1652 | while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
|
---|
1653 |
|
---|
1654 | if (*s == '<') // PCDATA ends here
|
---|
1655 | {
|
---|
1656 | *g.flush(s) = 0;
|
---|
1657 |
|
---|
1658 | return s + 1;
|
---|
1659 | }
|
---|
1660 | else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
|
---|
1661 | {
|
---|
1662 | *s++ = '\n'; // replace first one with 0x0a
|
---|
1663 |
|
---|
1664 | if (*s == '\n') g.push(s, 1);
|
---|
1665 | }
|
---|
1666 | else if (opt_escape::value && *s == '&')
|
---|
1667 | {
|
---|
1668 | s = strconv_escape(s, g);
|
---|
1669 | }
|
---|
1670 | else if (*s == 0)
|
---|
1671 | {
|
---|
1672 | return s;
|
---|
1673 | }
|
---|
1674 | else ++s;
|
---|
1675 | }
|
---|
1676 | }
|
---|
1677 | };
|
---|
1678 |
|
---|
1679 | strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
|
---|
1680 | {
|
---|
1681 | STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20);
|
---|
1682 |
|
---|
1683 | switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes)
|
---|
1684 | {
|
---|
1685 | case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse;
|
---|
1686 | case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse;
|
---|
1687 | case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse;
|
---|
1688 | case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse;
|
---|
1689 | default: return 0; // should not get here
|
---|
1690 | }
|
---|
1691 | }
|
---|
1692 |
|
---|
1693 | typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
|
---|
1694 |
|
---|
1695 | template <typename opt_escape> struct strconv_attribute_impl
|
---|
1696 | {
|
---|
1697 | static char_t* parse_wnorm(char_t* s, char_t end_quote)
|
---|
1698 | {
|
---|
1699 | gap g;
|
---|
1700 |
|
---|
1701 | // trim leading whitespaces
|
---|
1702 | if (IS_CHARTYPE(*s, ct_space))
|
---|
1703 | {
|
---|
1704 | char_t* str = s;
|
---|
1705 |
|
---|
1706 | do ++str;
|
---|
1707 | while (IS_CHARTYPE(*str, ct_space));
|
---|
1708 |
|
---|
1709 | g.push(s, str - s);
|
---|
1710 | }
|
---|
1711 |
|
---|
1712 | while (true)
|
---|
1713 | {
|
---|
1714 | while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
|
---|
1715 |
|
---|
1716 | if (*s == end_quote)
|
---|
1717 | {
|
---|
1718 | char_t* str = g.flush(s);
|
---|
1719 |
|
---|
1720 | do *str-- = 0;
|
---|
1721 | while (IS_CHARTYPE(*str, ct_space));
|
---|
1722 |
|
---|
1723 | return s + 1;
|
---|
1724 | }
|
---|
1725 | else if (IS_CHARTYPE(*s, ct_space))
|
---|
1726 | {
|
---|
1727 | *s++ = ' ';
|
---|
1728 |
|
---|
1729 | if (IS_CHARTYPE(*s, ct_space))
|
---|
1730 | {
|
---|
1731 | char_t* str = s + 1;
|
---|
1732 | while (IS_CHARTYPE(*str, ct_space)) ++str;
|
---|
1733 |
|
---|
1734 | g.push(s, str - s);
|
---|
1735 | }
|
---|
1736 | }
|
---|
1737 | else if (opt_escape::value && *s == '&')
|
---|
1738 | {
|
---|
1739 | s = strconv_escape(s, g);
|
---|
1740 | }
|
---|
1741 | else if (!*s)
|
---|
1742 | {
|
---|
1743 | return 0;
|
---|
1744 | }
|
---|
1745 | else ++s;
|
---|
1746 | }
|
---|
1747 | }
|
---|
1748 |
|
---|
1749 | static char_t* parse_wconv(char_t* s, char_t end_quote)
|
---|
1750 | {
|
---|
1751 | gap g;
|
---|
1752 |
|
---|
1753 | while (true)
|
---|
1754 | {
|
---|
1755 | while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
|
---|
1756 |
|
---|
1757 | if (*s == end_quote)
|
---|
1758 | {
|
---|
1759 | *g.flush(s) = 0;
|
---|
1760 |
|
---|
1761 | return s + 1;
|
---|
1762 | }
|
---|
1763 | else if (IS_CHARTYPE(*s, ct_space))
|
---|
1764 | {
|
---|
1765 | if (*s == '\r')
|
---|
1766 | {
|
---|
1767 | *s++ = ' ';
|
---|
1768 |
|
---|
1769 | if (*s == '\n') g.push(s, 1);
|
---|
1770 | }
|
---|
1771 | else *s++ = ' ';
|
---|
1772 | }
|
---|
1773 | else if (opt_escape::value && *s == '&')
|
---|
1774 | {
|
---|
1775 | s = strconv_escape(s, g);
|
---|
1776 | }
|
---|
1777 | else if (!*s)
|
---|
1778 | {
|
---|
1779 | return 0;
|
---|
1780 | }
|
---|
1781 | else ++s;
|
---|
1782 | }
|
---|
1783 | }
|
---|
1784 |
|
---|
1785 | static char_t* parse_eol(char_t* s, char_t end_quote)
|
---|
1786 | {
|
---|
1787 | gap g;
|
---|
1788 |
|
---|
1789 | while (true)
|
---|
1790 | {
|
---|
1791 | while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s;
|
---|
1792 |
|
---|
1793 | if (*s == end_quote)
|
---|
1794 | {
|
---|
1795 | *g.flush(s) = 0;
|
---|
1796 |
|
---|
1797 | return s + 1;
|
---|
1798 | }
|
---|
1799 | else if (*s == '\r')
|
---|
1800 | {
|
---|
1801 | *s++ = '\n';
|
---|
1802 |
|
---|
1803 | if (*s == '\n') g.push(s, 1);
|
---|
1804 | }
|
---|
1805 | else if (opt_escape::value && *s == '&')
|
---|
1806 | {
|
---|
1807 | s = strconv_escape(s, g);
|
---|
1808 | }
|
---|
1809 | else if (!*s)
|
---|
1810 | {
|
---|
1811 | return 0;
|
---|
1812 | }
|
---|
1813 | else ++s;
|
---|
1814 | }
|
---|
1815 | }
|
---|
1816 |
|
---|
1817 | static char_t* parse_simple(char_t* s, char_t end_quote)
|
---|
1818 | {
|
---|
1819 | gap g;
|
---|
1820 |
|
---|
1821 | while (true)
|
---|
1822 | {
|
---|
1823 | while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s;
|
---|
1824 |
|
---|
1825 | if (*s == end_quote)
|
---|
1826 | {
|
---|
1827 | *g.flush(s) = 0;
|
---|
1828 |
|
---|
1829 | return s + 1;
|
---|
1830 | }
|
---|
1831 | else if (opt_escape::value && *s == '&')
|
---|
1832 | {
|
---|
1833 | s = strconv_escape(s, g);
|
---|
1834 | }
|
---|
1835 | else if (!*s)
|
---|
1836 | {
|
---|
1837 | return 0;
|
---|
1838 | }
|
---|
1839 | else ++s;
|
---|
1840 | }
|
---|
1841 | }
|
---|
1842 | };
|
---|
1843 |
|
---|
1844 | strconv_attribute_t get_strconv_attribute(unsigned int optmask)
|
---|
1845 | {
|
---|
1846 | STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
|
---|
1847 |
|
---|
1848 | switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
|
---|
1849 | {
|
---|
1850 | case 0: return strconv_attribute_impl<opt_false>::parse_simple;
|
---|
1851 | case 1: return strconv_attribute_impl<opt_true>::parse_simple;
|
---|
1852 | case 2: return strconv_attribute_impl<opt_false>::parse_eol;
|
---|
1853 | case 3: return strconv_attribute_impl<opt_true>::parse_eol;
|
---|
1854 | case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
|
---|
1855 | case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
|
---|
1856 | case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
|
---|
1857 | case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
|
---|
1858 | case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
|
---|
1859 | case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
|
---|
1860 | case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
|
---|
1861 | case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
|
---|
1862 | case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
|
---|
1863 | case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
|
---|
1864 | case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
|
---|
1865 | case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
|
---|
1866 | default: return 0; // should not get here
|
---|
1867 | }
|
---|
1868 | }
|
---|
1869 |
|
---|
1870 | inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
|
---|
1871 | {
|
---|
1872 | xml_parse_result result;
|
---|
1873 | result.status = status;
|
---|
1874 | result.offset = offset;
|
---|
1875 |
|
---|
1876 | return result;
|
---|
1877 | }
|
---|
1878 |
|
---|
1879 | struct xml_parser
|
---|
1880 | {
|
---|
1881 | xml_allocator alloc;
|
---|
1882 | char_t* error_offset;
|
---|
1883 | jmp_buf error_handler;
|
---|
1884 |
|
---|
1885 | // Parser utilities.
|
---|
1886 | #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; }
|
---|
1887 | #define OPTSET(OPT) ( optmsk & OPT )
|
---|
1888 | #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) THROW_ERROR(status_out_of_memory, s); }
|
---|
1889 | #define POPNODE() { cursor = cursor->parent; }
|
---|
1890 | #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
|
---|
1891 | #define SCANWHILE(X) { while ((X)) ++s; }
|
---|
1892 | #define ENDSEG() { ch = *s; *s = 0; ++s; }
|
---|
1893 | #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err)
|
---|
1894 | #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); }
|
---|
1895 |
|
---|
1896 | xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0)
|
---|
1897 | {
|
---|
1898 | }
|
---|
1899 |
|
---|
1900 | // DOCTYPE consists of nested sections of the following possible types:
|
---|
1901 | // <!-- ... -->, <? ... ?>, "...", '...'
|
---|
1902 | // <![...]]>
|
---|
1903 | // <!...>
|
---|
1904 | // First group can not contain nested groups
|
---|
1905 | // Second group can contain nested groups of the same type
|
---|
1906 | // Third group can contain all other groups
|
---|
1907 | char_t* parse_doctype_primitive(char_t* s)
|
---|
1908 | {
|
---|
1909 | if (*s == '"' || *s == '\'')
|
---|
1910 | {
|
---|
1911 | // quoted string
|
---|
1912 | char_t ch = *s++;
|
---|
1913 | SCANFOR(*s == ch);
|
---|
1914 | if (!*s) THROW_ERROR(status_bad_doctype, s);
|
---|
1915 |
|
---|
1916 | s++;
|
---|
1917 | }
|
---|
1918 | else if (s[0] == '<' && s[1] == '?')
|
---|
1919 | {
|
---|
1920 | // <? ... ?>
|
---|
1921 | s += 2;
|
---|
1922 | SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
|
---|
1923 | if (!*s) THROW_ERROR(status_bad_doctype, s);
|
---|
1924 |
|
---|
1925 | s += 2;
|
---|
1926 | }
|
---|
1927 | else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
|
---|
1928 | {
|
---|
1929 | s += 4;
|
---|
1930 | SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
|
---|
1931 | if (!*s) THROW_ERROR(status_bad_doctype, s);
|
---|
1932 |
|
---|
1933 | s += 4;
|
---|
1934 | }
|
---|
1935 | else THROW_ERROR(status_bad_doctype, s);
|
---|
1936 |
|
---|
1937 | return s;
|
---|
1938 | }
|
---|
1939 |
|
---|
1940 | char_t* parse_doctype_ignore(char_t* s)
|
---|
1941 | {
|
---|
1942 | assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
|
---|
1943 | s++;
|
---|
1944 |
|
---|
1945 | while (*s)
|
---|
1946 | {
|
---|
1947 | if (s[0] == '<' && s[1] == '!' && s[2] == '[')
|
---|
1948 | {
|
---|
1949 | // nested ignore section
|
---|
1950 | s = parse_doctype_ignore(s);
|
---|
1951 | }
|
---|
1952 | else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
|
---|
1953 | {
|
---|
1954 | // ignore section end
|
---|
1955 | s += 3;
|
---|
1956 |
|
---|
1957 | return s;
|
---|
1958 | }
|
---|
1959 | else s++;
|
---|
1960 | }
|
---|
1961 |
|
---|
1962 | THROW_ERROR(status_bad_doctype, s);
|
---|
1963 |
|
---|
1964 | return s;
|
---|
1965 | }
|
---|
1966 |
|
---|
1967 | char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
|
---|
1968 | {
|
---|
1969 | assert(s[0] == '<' && s[1] == '!');
|
---|
1970 | s++;
|
---|
1971 |
|
---|
1972 | while (*s)
|
---|
1973 | {
|
---|
1974 | if (s[0] == '<' && s[1] == '!' && s[2] != '-')
|
---|
1975 | {
|
---|
1976 | if (s[2] == '[')
|
---|
1977 | {
|
---|
1978 | // ignore
|
---|
1979 | s = parse_doctype_ignore(s);
|
---|
1980 | }
|
---|
1981 | else
|
---|
1982 | {
|
---|
1983 | // some control group
|
---|
1984 | s = parse_doctype_group(s, endch, false);
|
---|
1985 | }
|
---|
1986 | }
|
---|
1987 | else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
|
---|
1988 | {
|
---|
1989 | // unknown tag (forbidden), or some primitive group
|
---|
1990 | s = parse_doctype_primitive(s);
|
---|
1991 | }
|
---|
1992 | else if (*s == '>')
|
---|
1993 | {
|
---|
1994 | s++;
|
---|
1995 |
|
---|
1996 | return s;
|
---|
1997 | }
|
---|
1998 | else s++;
|
---|
1999 | }
|
---|
2000 |
|
---|
2001 | if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s);
|
---|
2002 |
|
---|
2003 | return s;
|
---|
2004 | }
|
---|
2005 |
|
---|
2006 | char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
|
---|
2007 | {
|
---|
2008 | // parse node contents, starting with exclamation mark
|
---|
2009 | ++s;
|
---|
2010 |
|
---|
2011 | if (*s == '-') // '<!-...'
|
---|
2012 | {
|
---|
2013 | ++s;
|
---|
2014 |
|
---|
2015 | if (*s == '-') // '<!--...'
|
---|
2016 | {
|
---|
2017 | ++s;
|
---|
2018 |
|
---|
2019 | if (OPTSET(parse_comments))
|
---|
2020 | {
|
---|
2021 | PUSHNODE(node_comment); // Append a new node on the tree.
|
---|
2022 | cursor->value = s; // Save the offset.
|
---|
2023 | }
|
---|
2024 |
|
---|
2025 | if (OPTSET(parse_eol) && OPTSET(parse_comments))
|
---|
2026 | {
|
---|
2027 | s = strconv_comment(s, endch);
|
---|
2028 |
|
---|
2029 | if (!s) THROW_ERROR(status_bad_comment, cursor->value);
|
---|
2030 | }
|
---|
2031 | else
|
---|
2032 | {
|
---|
2033 | // Scan for terminating '-->'.
|
---|
2034 | SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
|
---|
2035 | CHECK_ERROR(status_bad_comment, s);
|
---|
2036 |
|
---|
2037 | if (OPTSET(parse_comments))
|
---|
2038 | *s = 0; // Zero-terminate this segment at the first terminating '-'.
|
---|
2039 |
|
---|
2040 | s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
|
---|
2041 | }
|
---|
2042 | }
|
---|
2043 | else THROW_ERROR(status_bad_comment, s);
|
---|
2044 | }
|
---|
2045 | else if (*s == '[')
|
---|
2046 | {
|
---|
2047 | // '<![CDATA[...'
|
---|
2048 | if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
|
---|
2049 | {
|
---|
2050 | ++s;
|
---|
2051 |
|
---|
2052 | if (OPTSET(parse_cdata))
|
---|
2053 | {
|
---|
2054 | PUSHNODE(node_cdata); // Append a new node on the tree.
|
---|
2055 | cursor->value = s; // Save the offset.
|
---|
2056 |
|
---|
2057 | if (OPTSET(parse_eol))
|
---|
2058 | {
|
---|
2059 | s = strconv_cdata(s, endch);
|
---|
2060 |
|
---|
2061 | if (!s) THROW_ERROR(status_bad_cdata, cursor->value);
|
---|
2062 | }
|
---|
2063 | else
|
---|
2064 | {
|
---|
2065 | // Scan for terminating ']]>'.
|
---|
2066 | SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
|
---|
2067 | CHECK_ERROR(status_bad_cdata, s);
|
---|
2068 |
|
---|
2069 | *s++ = 0; // Zero-terminate this segment.
|
---|
2070 | }
|
---|
2071 | }
|
---|
2072 | else // Flagged for discard, but we still have to scan for the terminator.
|
---|
2073 | {
|
---|
2074 | // Scan for terminating ']]>'.
|
---|
2075 | SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
|
---|
2076 | CHECK_ERROR(status_bad_cdata, s);
|
---|
2077 |
|
---|
2078 | ++s;
|
---|
2079 | }
|
---|
2080 |
|
---|
2081 | s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
|
---|
2082 | }
|
---|
2083 | else THROW_ERROR(status_bad_cdata, s);
|
---|
2084 | }
|
---|
2085 | else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
|
---|
2086 | {
|
---|
2087 | s -= 2;
|
---|
2088 |
|
---|
2089 | if (cursor->parent) THROW_ERROR(status_bad_doctype, s);
|
---|
2090 |
|
---|
2091 | char_t* mark = s + 9;
|
---|
2092 |
|
---|
2093 | s = parse_doctype_group(s, endch, true);
|
---|
2094 |
|
---|
2095 | if (OPTSET(parse_doctype))
|
---|
2096 | {
|
---|
2097 | while (IS_CHARTYPE(*mark, ct_space)) ++mark;
|
---|
2098 |
|
---|
2099 | PUSHNODE(node_doctype);
|
---|
2100 |
|
---|
2101 | cursor->value = mark;
|
---|
2102 |
|
---|
2103 | assert((s[0] == 0 && endch == '>') || s[-1] == '>');
|
---|
2104 | s[*s == 0 ? 0 : -1] = 0;
|
---|
2105 |
|
---|
2106 | POPNODE();
|
---|
2107 | }
|
---|
2108 | }
|
---|
2109 | else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s);
|
---|
2110 | else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s);
|
---|
2111 | else THROW_ERROR(status_unrecognized_tag, s);
|
---|
2112 |
|
---|
2113 | return s;
|
---|
2114 | }
|
---|
2115 |
|
---|
2116 | char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
|
---|
2117 | {
|
---|
2118 | // load into registers
|
---|
2119 | xml_node_struct* cursor = ref_cursor;
|
---|
2120 | char_t ch = 0;
|
---|
2121 |
|
---|
2122 | // parse node contents, starting with question mark
|
---|
2123 | ++s;
|
---|
2124 |
|
---|
2125 | // read PI target
|
---|
2126 | char_t* target = s;
|
---|
2127 |
|
---|
2128 | if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s);
|
---|
2129 |
|
---|
2130 | SCANWHILE(IS_CHARTYPE(*s, ct_symbol));
|
---|
2131 | CHECK_ERROR(status_bad_pi, s);
|
---|
2132 |
|
---|
2133 | // determine node type; stricmp / strcasecmp is not portable
|
---|
2134 | bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
|
---|
2135 |
|
---|
2136 | if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi))
|
---|
2137 | {
|
---|
2138 | if (declaration)
|
---|
2139 | {
|
---|
2140 | // disallow non top-level declarations
|
---|
2141 | if (cursor->parent) THROW_ERROR(status_bad_pi, s);
|
---|
2142 |
|
---|
2143 | PUSHNODE(node_declaration);
|
---|
2144 | }
|
---|
2145 | else
|
---|
2146 | {
|
---|
2147 | PUSHNODE(node_pi);
|
---|
2148 | }
|
---|
2149 |
|
---|
2150 | cursor->name = target;
|
---|
2151 |
|
---|
2152 | ENDSEG();
|
---|
2153 |
|
---|
2154 | // parse value/attributes
|
---|
2155 | if (ch == '?')
|
---|
2156 | {
|
---|
2157 | // empty node
|
---|
2158 | if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
|
---|
2159 | s += (*s == '>');
|
---|
2160 |
|
---|
2161 | POPNODE();
|
---|
2162 | }
|
---|
2163 | else if (IS_CHARTYPE(ch, ct_space))
|
---|
2164 | {
|
---|
2165 | SKIPWS();
|
---|
2166 |
|
---|
2167 | // scan for tag end
|
---|
2168 | char_t* value = s;
|
---|
2169 |
|
---|
2170 | SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
|
---|
2171 | CHECK_ERROR(status_bad_pi, s);
|
---|
2172 |
|
---|
2173 | if (declaration)
|
---|
2174 | {
|
---|
2175 | // replace ending ? with / so that 'element' terminates properly
|
---|
2176 | *s = '/';
|
---|
2177 |
|
---|
2178 | // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
|
---|
2179 | s = value;
|
---|
2180 | }
|
---|
2181 | else
|
---|
2182 | {
|
---|
2183 | // store value and step over >
|
---|
2184 | cursor->value = value;
|
---|
2185 | POPNODE();
|
---|
2186 |
|
---|
2187 | ENDSEG();
|
---|
2188 |
|
---|
2189 | s += (*s == '>');
|
---|
2190 | }
|
---|
2191 | }
|
---|
2192 | else THROW_ERROR(status_bad_pi, s);
|
---|
2193 | }
|
---|
2194 | else
|
---|
2195 | {
|
---|
2196 | // scan for tag end
|
---|
2197 | SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
|
---|
2198 | CHECK_ERROR(status_bad_pi, s);
|
---|
2199 |
|
---|
2200 | s += (s[1] == '>' ? 2 : 1);
|
---|
2201 | }
|
---|
2202 |
|
---|
2203 | // store from registers
|
---|
2204 | ref_cursor = cursor;
|
---|
2205 |
|
---|
2206 | return s;
|
---|
2207 | }
|
---|
2208 |
|
---|
2209 | void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch)
|
---|
2210 | {
|
---|
2211 | strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
|
---|
2212 | strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
|
---|
2213 |
|
---|
2214 | char_t ch = 0;
|
---|
2215 | xml_node_struct* cursor = xmldoc;
|
---|
2216 | char_t* mark = s;
|
---|
2217 |
|
---|
2218 | while (*s != 0)
|
---|
2219 | {
|
---|
2220 | if (*s == '<')
|
---|
2221 | {
|
---|
2222 | ++s;
|
---|
2223 |
|
---|
2224 | LOC_TAG:
|
---|
2225 | if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
|
---|
2226 | {
|
---|
2227 | PUSHNODE(node_element); // Append a new node to the tree.
|
---|
2228 |
|
---|
2229 | cursor->name = s;
|
---|
2230 |
|
---|
2231 | SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
|
---|
2232 | ENDSEG(); // Save char in 'ch', terminate & step over.
|
---|
2233 |
|
---|
2234 | if (ch == '>')
|
---|
2235 | {
|
---|
2236 | // end of tag
|
---|
2237 | }
|
---|
2238 | else if (IS_CHARTYPE(ch, ct_space))
|
---|
2239 | {
|
---|
2240 | LOC_ATTRIBUTES:
|
---|
2241 | while (true)
|
---|
2242 | {
|
---|
2243 | SKIPWS(); // Eat any whitespace.
|
---|
2244 |
|
---|
2245 | if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
|
---|
2246 | {
|
---|
2247 | xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
|
---|
2248 | if (!a) THROW_ERROR(status_out_of_memory, s);
|
---|
2249 |
|
---|
2250 | a->name = s; // Save the offset.
|
---|
2251 |
|
---|
2252 | SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
|
---|
2253 | CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
|
---|
2254 |
|
---|
2255 | ENDSEG(); // Save char in 'ch', terminate & step over.
|
---|
2256 | CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
|
---|
2257 |
|
---|
2258 | if (IS_CHARTYPE(ch, ct_space))
|
---|
2259 | {
|
---|
2260 | SKIPWS(); // Eat any whitespace.
|
---|
2261 | CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
|
---|
2262 |
|
---|
2263 | ch = *s;
|
---|
2264 | ++s;
|
---|
2265 | }
|
---|
2266 |
|
---|
2267 | if (ch == '=') // '<... #=...'
|
---|
2268 | {
|
---|
2269 | SKIPWS(); // Eat any whitespace.
|
---|
2270 |
|
---|
2271 | if (*s == '"' || *s == '\'') // '<... #="...'
|
---|
2272 | {
|
---|
2273 | ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
|
---|
2274 | ++s; // Step over the quote.
|
---|
2275 | a->value = s; // Save the offset.
|
---|
2276 |
|
---|
2277 | s = strconv_attribute(s, ch);
|
---|
2278 |
|
---|
2279 | if (!s) THROW_ERROR(status_bad_attribute, a->value);
|
---|
2280 |
|
---|
2281 | // After this line the loop continues from the start;
|
---|
2282 | // Whitespaces, / and > are ok, symbols and EOF are wrong,
|
---|
2283 | // everything else will be detected
|
---|
2284 | if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s);
|
---|
2285 | }
|
---|
2286 | else THROW_ERROR(status_bad_attribute, s);
|
---|
2287 | }
|
---|
2288 | else THROW_ERROR(status_bad_attribute, s);
|
---|
2289 | }
|
---|
2290 | else if (*s == '/')
|
---|
2291 | {
|
---|
2292 | ++s;
|
---|
2293 |
|
---|
2294 | if (*s == '>')
|
---|
2295 | {
|
---|
2296 | POPNODE();
|
---|
2297 | s++;
|
---|
2298 | break;
|
---|
2299 | }
|
---|
2300 | else if (*s == 0 && endch == '>')
|
---|
2301 | {
|
---|
2302 | POPNODE();
|
---|
2303 | break;
|
---|
2304 | }
|
---|
2305 | else THROW_ERROR(status_bad_start_element, s);
|
---|
2306 | }
|
---|
2307 | else if (*s == '>')
|
---|
2308 | {
|
---|
2309 | ++s;
|
---|
2310 |
|
---|
2311 | break;
|
---|
2312 | }
|
---|
2313 | else if (*s == 0 && endch == '>')
|
---|
2314 | {
|
---|
2315 | break;
|
---|
2316 | }
|
---|
2317 | else THROW_ERROR(status_bad_start_element, s);
|
---|
2318 | }
|
---|
2319 |
|
---|
2320 | // !!!
|
---|
2321 | }
|
---|
2322 | else if (ch == '/') // '<#.../'
|
---|
2323 | {
|
---|
2324 | if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s);
|
---|
2325 |
|
---|
2326 | POPNODE(); // Pop.
|
---|
2327 |
|
---|
2328 | s += (*s == '>');
|
---|
2329 | }
|
---|
2330 | else if (ch == 0)
|
---|
2331 | {
|
---|
2332 | // we stepped over null terminator, backtrack & handle closing tag
|
---|
2333 | --s;
|
---|
2334 |
|
---|
2335 | if (endch != '>') THROW_ERROR(status_bad_start_element, s);
|
---|
2336 | }
|
---|
2337 | else THROW_ERROR(status_bad_start_element, s);
|
---|
2338 | }
|
---|
2339 | else if (*s == '/')
|
---|
2340 | {
|
---|
2341 | ++s;
|
---|
2342 |
|
---|
2343 | char_t* name = cursor->name;
|
---|
2344 | if (!name) THROW_ERROR(status_end_element_mismatch, s);
|
---|
2345 |
|
---|
2346 | while (IS_CHARTYPE(*s, ct_symbol))
|
---|
2347 | {
|
---|
2348 | if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s);
|
---|
2349 | }
|
---|
2350 |
|
---|
2351 | if (*name)
|
---|
2352 | {
|
---|
2353 | if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s);
|
---|
2354 | else THROW_ERROR(status_end_element_mismatch, s);
|
---|
2355 | }
|
---|
2356 |
|
---|
2357 | POPNODE(); // Pop.
|
---|
2358 |
|
---|
2359 | SKIPWS();
|
---|
2360 |
|
---|
2361 | if (*s == 0)
|
---|
2362 | {
|
---|
2363 | if (endch != '>') THROW_ERROR(status_bad_end_element, s);
|
---|
2364 | }
|
---|
2365 | else
|
---|
2366 | {
|
---|
2367 | if (*s != '>') THROW_ERROR(status_bad_end_element, s);
|
---|
2368 | ++s;
|
---|
2369 | }
|
---|
2370 | }
|
---|
2371 | else if (*s == '?') // '<?...'
|
---|
2372 | {
|
---|
2373 | s = parse_question(s, cursor, optmsk, endch);
|
---|
2374 |
|
---|
2375 | assert(cursor);
|
---|
2376 | if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
|
---|
2377 | }
|
---|
2378 | else if (*s == '!') // '<!...'
|
---|
2379 | {
|
---|
2380 | s = parse_exclamation(s, cursor, optmsk, endch);
|
---|
2381 | }
|
---|
2382 | else if (*s == 0 && endch == '?') THROW_ERROR(status_bad_pi, s);
|
---|
2383 | else THROW_ERROR(status_unrecognized_tag, s);
|
---|
2384 | }
|
---|
2385 | else
|
---|
2386 | {
|
---|
2387 | mark = s; // Save this offset while searching for a terminator.
|
---|
2388 |
|
---|
2389 | SKIPWS(); // Eat whitespace if no genuine PCDATA here.
|
---|
2390 |
|
---|
2391 | if ((!OPTSET(parse_ws_pcdata) || mark == s) && (*s == '<' || !*s))
|
---|
2392 | {
|
---|
2393 | continue;
|
---|
2394 | }
|
---|
2395 |
|
---|
2396 | s = mark;
|
---|
2397 |
|
---|
2398 | if (cursor->parent)
|
---|
2399 | {
|
---|
2400 | PUSHNODE(node_pcdata); // Append a new node on the tree.
|
---|
2401 | cursor->value = s; // Save the offset.
|
---|
2402 |
|
---|
2403 | s = strconv_pcdata(s);
|
---|
2404 |
|
---|
2405 | POPNODE(); // Pop since this is a standalone.
|
---|
2406 |
|
---|
2407 | if (!*s) break;
|
---|
2408 | }
|
---|
2409 | else
|
---|
2410 | {
|
---|
2411 | SCANFOR(*s == '<'); // '...<'
|
---|
2412 | if (!*s) break;
|
---|
2413 |
|
---|
2414 | ++s;
|
---|
2415 | }
|
---|
2416 |
|
---|
2417 | // We're after '<'
|
---|
2418 | goto LOC_TAG;
|
---|
2419 | }
|
---|
2420 | }
|
---|
2421 |
|
---|
2422 | // check that last tag is closed
|
---|
2423 | if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s);
|
---|
2424 | }
|
---|
2425 |
|
---|
2426 | static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk)
|
---|
2427 | {
|
---|
2428 | xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root);
|
---|
2429 |
|
---|
2430 | // store buffer for offset_debug
|
---|
2431 | xmldoc->buffer = buffer;
|
---|
2432 |
|
---|
2433 | // early-out for empty documents
|
---|
2434 | if (length == 0) return make_parse_result(status_ok);
|
---|
2435 |
|
---|
2436 | // create parser on stack
|
---|
2437 | xml_parser parser(*xmldoc);
|
---|
2438 |
|
---|
2439 | // save last character and make buffer zero-terminated (speeds up parsing)
|
---|
2440 | char_t endch = buffer[length - 1];
|
---|
2441 | buffer[length - 1] = 0;
|
---|
2442 |
|
---|
2443 | // perform actual parsing
|
---|
2444 | int error = setjmp(parser.error_handler);
|
---|
2445 |
|
---|
2446 | if (error == 0)
|
---|
2447 | {
|
---|
2448 | parser.parse(buffer, xmldoc, optmsk, endch);
|
---|
2449 | }
|
---|
2450 |
|
---|
2451 | xml_parse_result result = make_parse_result(static_cast<xml_parse_status>(error), parser.error_offset ? parser.error_offset - buffer : 0);
|
---|
2452 | assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
|
---|
2453 |
|
---|
2454 | // update allocator state
|
---|
2455 | *static_cast<xml_allocator*>(xmldoc) = parser.alloc;
|
---|
2456 |
|
---|
2457 | // since we removed last character, we have to handle the only possible false positive
|
---|
2458 | if (result && endch == '<')
|
---|
2459 | {
|
---|
2460 | // there's no possible well-formed document with < at the end
|
---|
2461 | return make_parse_result(status_unrecognized_tag, length);
|
---|
2462 | }
|
---|
2463 |
|
---|
2464 | return result;
|
---|
2465 | }
|
---|
2466 | };
|
---|
2467 |
|
---|
2468 | // Output facilities
|
---|
2469 | xml_encoding get_write_native_encoding()
|
---|
2470 | {
|
---|
2471 | #ifdef PUGIXML_WCHAR_MODE
|
---|
2472 | return get_wchar_encoding();
|
---|
2473 | #else
|
---|
2474 | return encoding_utf8;
|
---|
2475 | #endif
|
---|
2476 | }
|
---|
2477 |
|
---|
2478 | xml_encoding get_write_encoding(xml_encoding encoding)
|
---|
2479 | {
|
---|
2480 | // replace wchar encoding with utf implementation
|
---|
2481 | if (encoding == encoding_wchar) return get_wchar_encoding();
|
---|
2482 |
|
---|
2483 | // replace utf16 encoding with utf16 with specific endianness
|
---|
2484 | if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
|
---|
2485 |
|
---|
2486 | // replace utf32 encoding with utf32 with specific endianness
|
---|
2487 | if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
|
---|
2488 |
|
---|
2489 | // only do autodetection if no explicit encoding is requested
|
---|
2490 | if (encoding != encoding_auto) return encoding;
|
---|
2491 |
|
---|
2492 | // assume utf8 encoding
|
---|
2493 | return encoding_utf8;
|
---|
2494 | }
|
---|
2495 |
|
---|
2496 | #ifdef PUGIXML_WCHAR_MODE
|
---|
2497 | size_t get_valid_length(const char_t* data, size_t length)
|
---|
2498 | {
|
---|
2499 | assert(length > 0);
|
---|
2500 |
|
---|
2501 | // discard last character if it's the lead of a surrogate pair
|
---|
2502 | return (sizeof(wchar_t) == 2 && (unsigned)(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
|
---|
2503 | }
|
---|
2504 |
|
---|
2505 | size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding)
|
---|
2506 | {
|
---|
2507 | // only endian-swapping is required
|
---|
2508 | if (need_endian_swap_utf(encoding, get_wchar_encoding()))
|
---|
2509 | {
|
---|
2510 | convert_wchar_endian_swap(reinterpret_cast<char_t*>(result), data, length);
|
---|
2511 |
|
---|
2512 | return length * sizeof(char_t);
|
---|
2513 | }
|
---|
2514 |
|
---|
2515 | // convert to utf8
|
---|
2516 | if (encoding == encoding_utf8)
|
---|
2517 | {
|
---|
2518 | uint8_t* dest = reinterpret_cast<uint8_t*>(result);
|
---|
2519 |
|
---|
2520 | uint8_t* end = sizeof(wchar_t) == 2 ?
|
---|
2521 | utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(data), length, dest) :
|
---|
2522 | utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(data), length, dest);
|
---|
2523 |
|
---|
2524 | return static_cast<size_t>(end - dest);
|
---|
2525 | }
|
---|
2526 |
|
---|
2527 | // convert to utf16
|
---|
2528 | if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
|
---|
2529 | {
|
---|
2530 | uint16_t* dest = reinterpret_cast<uint16_t*>(result);
|
---|
2531 |
|
---|
2532 | // convert to native utf16
|
---|
2533 | uint16_t* end = utf_decoder<utf16_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(data), length, dest);
|
---|
2534 |
|
---|
2535 | // swap if necessary
|
---|
2536 | xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
|
---|
2537 |
|
---|
2538 | if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
|
---|
2539 |
|
---|
2540 | return static_cast<size_t>(end - dest) * sizeof(uint16_t);
|
---|
2541 | }
|
---|
2542 |
|
---|
2543 | // convert to utf32
|
---|
2544 | if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
|
---|
2545 | {
|
---|
2546 | uint32_t* dest = reinterpret_cast<uint32_t*>(result);
|
---|
2547 |
|
---|
2548 | // convert to native utf32
|
---|
2549 | uint32_t* end = utf_decoder<utf32_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(data), length, dest);
|
---|
2550 |
|
---|
2551 | // swap if necessary
|
---|
2552 | xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
|
---|
2553 |
|
---|
2554 | if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
|
---|
2555 |
|
---|
2556 | return static_cast<size_t>(end - dest) * sizeof(uint32_t);
|
---|
2557 | }
|
---|
2558 |
|
---|
2559 | assert(!"Invalid encoding");
|
---|
2560 | return 0;
|
---|
2561 | }
|
---|
2562 | #else
|
---|
2563 | size_t get_valid_length(const char_t* data, size_t length)
|
---|
2564 | {
|
---|
2565 | assert(length > 4);
|
---|
2566 |
|
---|
2567 | for (size_t i = 1; i <= 4; ++i)
|
---|
2568 | {
|
---|
2569 | uint8_t ch = static_cast<uint8_t>(data[length - i]);
|
---|
2570 |
|
---|
2571 | // either a standalone character or a leading one
|
---|
2572 | if ((ch & 0xc0) != 0x80) return length - i;
|
---|
2573 | }
|
---|
2574 |
|
---|
2575 | // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
|
---|
2576 | return length;
|
---|
2577 | }
|
---|
2578 |
|
---|
2579 | size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding)
|
---|
2580 | {
|
---|
2581 | if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
|
---|
2582 | {
|
---|
2583 | uint16_t* dest = reinterpret_cast<uint16_t*>(result);
|
---|
2584 |
|
---|
2585 | // convert to native utf16
|
---|
2586 | uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
|
---|
2587 |
|
---|
2588 | // swap if necessary
|
---|
2589 | xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
|
---|
2590 |
|
---|
2591 | if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
|
---|
2592 |
|
---|
2593 | return static_cast<size_t>(end - dest) * sizeof(uint16_t);
|
---|
2594 | }
|
---|
2595 |
|
---|
2596 | if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
|
---|
2597 | {
|
---|
2598 | uint32_t* dest = reinterpret_cast<uint32_t*>(result);
|
---|
2599 |
|
---|
2600 | // convert to native utf32
|
---|
2601 | uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
|
---|
2602 |
|
---|
2603 | // swap if necessary
|
---|
2604 | xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
|
---|
2605 |
|
---|
2606 | if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
|
---|
2607 |
|
---|
2608 | return static_cast<size_t>(end - dest) * sizeof(uint32_t);
|
---|
2609 | }
|
---|
2610 |
|
---|
2611 | assert(!"Invalid encoding");
|
---|
2612 | return 0;
|
---|
2613 | }
|
---|
2614 | #endif
|
---|
2615 |
|
---|
2616 | class xml_buffered_writer
|
---|
2617 | {
|
---|
2618 | xml_buffered_writer(const xml_buffered_writer&);
|
---|
2619 | xml_buffered_writer& operator=(const xml_buffered_writer&);
|
---|
2620 |
|
---|
2621 | public:
|
---|
2622 | xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding))
|
---|
2623 | {
|
---|
2624 | }
|
---|
2625 |
|
---|
2626 | ~xml_buffered_writer()
|
---|
2627 | {
|
---|
2628 | flush();
|
---|
2629 | }
|
---|
2630 |
|
---|
2631 | void flush()
|
---|
2632 | {
|
---|
2633 | flush(buffer, bufsize);
|
---|
2634 | bufsize = 0;
|
---|
2635 | }
|
---|
2636 |
|
---|
2637 | void flush(const char_t* data, size_t size)
|
---|
2638 | {
|
---|
2639 | if (size == 0) return;
|
---|
2640 |
|
---|
2641 | // fast path, just write data
|
---|
2642 | if (encoding == get_write_native_encoding())
|
---|
2643 | writer.write(data, size * sizeof(char_t));
|
---|
2644 | else
|
---|
2645 | {
|
---|
2646 | // convert chunk
|
---|
2647 | size_t result = convert_buffer(scratch, data, size, encoding);
|
---|
2648 | assert(result <= sizeof(scratch));
|
---|
2649 |
|
---|
2650 | // write data
|
---|
2651 | writer.write(scratch, result);
|
---|
2652 | }
|
---|
2653 | }
|
---|
2654 |
|
---|
2655 | void write(const char_t* data, size_t length)
|
---|
2656 | {
|
---|
2657 | if (bufsize + length > bufcapacity)
|
---|
2658 | {
|
---|
2659 | // flush the remaining buffer contents
|
---|
2660 | flush();
|
---|
2661 |
|
---|
2662 | // handle large chunks
|
---|
2663 | if (length > bufcapacity)
|
---|
2664 | {
|
---|
2665 | if (encoding == get_write_native_encoding())
|
---|
2666 | {
|
---|
2667 | // fast path, can just write data chunk
|
---|
2668 | writer.write(data, length * sizeof(char_t));
|
---|
2669 | return;
|
---|
2670 | }
|
---|
2671 |
|
---|
2672 | // need to convert in suitable chunks
|
---|
2673 | while (length > bufcapacity)
|
---|
2674 | {
|
---|
2675 | // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
|
---|
2676 | // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
|
---|
2677 | size_t chunk_size = get_valid_length(data, bufcapacity);
|
---|
2678 |
|
---|
2679 | // convert chunk and write
|
---|
2680 | flush(data, chunk_size);
|
---|
2681 |
|
---|
2682 | // iterate
|
---|
2683 | data += chunk_size;
|
---|
2684 | length -= chunk_size;
|
---|
2685 | }
|
---|
2686 |
|
---|
2687 | // small tail is copied below
|
---|
2688 | bufsize = 0;
|
---|
2689 | }
|
---|
2690 | }
|
---|
2691 |
|
---|
2692 | memcpy(buffer + bufsize, data, length * sizeof(char_t));
|
---|
2693 | bufsize += length;
|
---|
2694 | }
|
---|
2695 |
|
---|
2696 | void write(const char_t* data)
|
---|
2697 | {
|
---|
2698 | write(data, strlength(data));
|
---|
2699 | }
|
---|
2700 |
|
---|
2701 | void write(char_t d0)
|
---|
2702 | {
|
---|
2703 | if (bufsize + 1 > bufcapacity) flush();
|
---|
2704 |
|
---|
2705 | buffer[bufsize + 0] = d0;
|
---|
2706 | bufsize += 1;
|
---|
2707 | }
|
---|
2708 |
|
---|
2709 | void write(char_t d0, char_t d1)
|
---|
2710 | {
|
---|
2711 | if (bufsize + 2 > bufcapacity) flush();
|
---|
2712 |
|
---|
2713 | buffer[bufsize + 0] = d0;
|
---|
2714 | buffer[bufsize + 1] = d1;
|
---|
2715 | bufsize += 2;
|
---|
2716 | }
|
---|
2717 |
|
---|
2718 | void write(char_t d0, char_t d1, char_t d2)
|
---|
2719 | {
|
---|
2720 | if (bufsize + 3 > bufcapacity) flush();
|
---|
2721 |
|
---|
2722 | buffer[bufsize + 0] = d0;
|
---|
2723 | buffer[bufsize + 1] = d1;
|
---|
2724 | buffer[bufsize + 2] = d2;
|
---|
2725 | bufsize += 3;
|
---|
2726 | }
|
---|
2727 |
|
---|
2728 | void write(char_t d0, char_t d1, char_t d2, char_t d3)
|
---|
2729 | {
|
---|
2730 | if (bufsize + 4 > bufcapacity) flush();
|
---|
2731 |
|
---|
2732 | buffer[bufsize + 0] = d0;
|
---|
2733 | buffer[bufsize + 1] = d1;
|
---|
2734 | buffer[bufsize + 2] = d2;
|
---|
2735 | buffer[bufsize + 3] = d3;
|
---|
2736 | bufsize += 4;
|
---|
2737 | }
|
---|
2738 |
|
---|
2739 | void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
|
---|
2740 | {
|
---|
2741 | if (bufsize + 5 > bufcapacity) flush();
|
---|
2742 |
|
---|
2743 | buffer[bufsize + 0] = d0;
|
---|
2744 | buffer[bufsize + 1] = d1;
|
---|
2745 | buffer[bufsize + 2] = d2;
|
---|
2746 | buffer[bufsize + 3] = d3;
|
---|
2747 | buffer[bufsize + 4] = d4;
|
---|
2748 | bufsize += 5;
|
---|
2749 | }
|
---|
2750 |
|
---|
2751 | void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
|
---|
2752 | {
|
---|
2753 | if (bufsize + 6 > bufcapacity) flush();
|
---|
2754 |
|
---|
2755 | buffer[bufsize + 0] = d0;
|
---|
2756 | buffer[bufsize + 1] = d1;
|
---|
2757 | buffer[bufsize + 2] = d2;
|
---|
2758 | buffer[bufsize + 3] = d3;
|
---|
2759 | buffer[bufsize + 4] = d4;
|
---|
2760 | buffer[bufsize + 5] = d5;
|
---|
2761 | bufsize += 6;
|
---|
2762 | }
|
---|
2763 |
|
---|
2764 | // utf8 maximum expansion: x4 (-> utf32)
|
---|
2765 | // utf16 maximum expansion: x2 (-> utf32)
|
---|
2766 | // utf32 maximum expansion: x1
|
---|
2767 | enum { bufcapacity = 2048 };
|
---|
2768 |
|
---|
2769 | char_t buffer[bufcapacity];
|
---|
2770 | char scratch[4 * bufcapacity];
|
---|
2771 |
|
---|
2772 | xml_writer& writer;
|
---|
2773 | size_t bufsize;
|
---|
2774 | xml_encoding encoding;
|
---|
2775 | };
|
---|
2776 |
|
---|
2777 | void write_bom(xml_writer& writer, xml_encoding encoding)
|
---|
2778 | {
|
---|
2779 | switch (encoding)
|
---|
2780 | {
|
---|
2781 | case encoding_utf8:
|
---|
2782 | writer.write("\xef\xbb\xbf", 3);
|
---|
2783 | break;
|
---|
2784 |
|
---|
2785 | case encoding_utf16_be:
|
---|
2786 | writer.write("\xfe\xff", 2);
|
---|
2787 | break;
|
---|
2788 |
|
---|
2789 | case encoding_utf16_le:
|
---|
2790 | writer.write("\xff\xfe", 2);
|
---|
2791 | break;
|
---|
2792 |
|
---|
2793 | case encoding_utf32_be:
|
---|
2794 | writer.write("\x00\x00\xfe\xff", 4);
|
---|
2795 | break;
|
---|
2796 |
|
---|
2797 | case encoding_utf32_le:
|
---|
2798 | writer.write("\xff\xfe\x00\x00", 4);
|
---|
2799 | break;
|
---|
2800 |
|
---|
2801 | default:
|
---|
2802 | assert(!"Invalid encoding");
|
---|
2803 | }
|
---|
2804 | }
|
---|
2805 |
|
---|
2806 | void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
|
---|
2807 | {
|
---|
2808 | while (*s)
|
---|
2809 | {
|
---|
2810 | const char_t* prev = s;
|
---|
2811 |
|
---|
2812 | // While *s is a usual symbol
|
---|
2813 | while (!IS_CHARTYPEX(*s, type)) ++s;
|
---|
2814 |
|
---|
2815 | writer.write(prev, static_cast<size_t>(s - prev));
|
---|
2816 |
|
---|
2817 | switch (*s)
|
---|
2818 | {
|
---|
2819 | case 0: break;
|
---|
2820 | case '&':
|
---|
2821 | writer.write('&', 'a', 'm', 'p', ';');
|
---|
2822 | ++s;
|
---|
2823 | break;
|
---|
2824 | case '<':
|
---|
2825 | writer.write('&', 'l', 't', ';');
|
---|
2826 | ++s;
|
---|
2827 | break;
|
---|
2828 | case '>':
|
---|
2829 | writer.write('&', 'g', 't', ';');
|
---|
2830 | ++s;
|
---|
2831 | break;
|
---|
2832 | case '"':
|
---|
2833 | writer.write('&', 'q', 'u', 'o', 't', ';');
|
---|
2834 | ++s;
|
---|
2835 | break;
|
---|
2836 | default: // s is not a usual symbol
|
---|
2837 | {
|
---|
2838 | unsigned int ch = static_cast<unsigned int>(*s++);
|
---|
2839 | assert(ch < 32);
|
---|
2840 |
|
---|
2841 | writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
|
---|
2842 | }
|
---|
2843 | }
|
---|
2844 | }
|
---|
2845 | }
|
---|
2846 |
|
---|
2847 | void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
|
---|
2848 | {
|
---|
2849 | do
|
---|
2850 | {
|
---|
2851 | writer.write('<', '!', '[', 'C', 'D');
|
---|
2852 | writer.write('A', 'T', 'A', '[');
|
---|
2853 |
|
---|
2854 | const char_t* prev = s;
|
---|
2855 |
|
---|
2856 | // look for ]]> sequence - we can't output it as is since it terminates CDATA
|
---|
2857 | while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
|
---|
2858 |
|
---|
2859 | // skip ]] if we stopped at ]]>, > will go to the next CDATA section
|
---|
2860 | if (*s) s += 2;
|
---|
2861 |
|
---|
2862 | writer.write(prev, static_cast<size_t>(s - prev));
|
---|
2863 |
|
---|
2864 | writer.write(']', ']', '>');
|
---|
2865 | }
|
---|
2866 | while (*s);
|
---|
2867 | }
|
---|
2868 |
|
---|
2869 | void node_output_attributes(xml_buffered_writer& writer, const xml_node& node)
|
---|
2870 | {
|
---|
2871 | const char_t* default_name = PUGIXML_TEXT(":anonymous");
|
---|
2872 |
|
---|
2873 | for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
|
---|
2874 | {
|
---|
2875 | writer.write(' ');
|
---|
2876 | writer.write(a.name()[0] ? a.name() : default_name);
|
---|
2877 | writer.write('=', '"');
|
---|
2878 |
|
---|
2879 | text_output_escaped(writer, a.value(), ctx_special_attr);
|
---|
2880 |
|
---|
2881 | writer.write('"');
|
---|
2882 | }
|
---|
2883 | }
|
---|
2884 |
|
---|
2885 | void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
|
---|
2886 | {
|
---|
2887 | const char_t* default_name = PUGIXML_TEXT(":anonymous");
|
---|
2888 |
|
---|
2889 | if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
|
---|
2890 | for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
|
---|
2891 |
|
---|
2892 | switch (node.type())
|
---|
2893 | {
|
---|
2894 | case node_document:
|
---|
2895 | {
|
---|
2896 | for (xml_node n = node.first_child(); n; n = n.next_sibling())
|
---|
2897 | node_output(writer, n, indent, flags, depth);
|
---|
2898 | break;
|
---|
2899 | }
|
---|
2900 |
|
---|
2901 | case node_element:
|
---|
2902 | {
|
---|
2903 | const char_t* name = node.name()[0] ? node.name() : default_name;
|
---|
2904 |
|
---|
2905 | writer.write('<');
|
---|
2906 | writer.write(name);
|
---|
2907 |
|
---|
2908 | node_output_attributes(writer, node);
|
---|
2909 |
|
---|
2910 | if (flags & format_raw)
|
---|
2911 | {
|
---|
2912 | if (!node.first_child())
|
---|
2913 | writer.write(' ', '/', '>');
|
---|
2914 | else
|
---|
2915 | {
|
---|
2916 | writer.write('>');
|
---|
2917 |
|
---|
2918 | for (xml_node n = node.first_child(); n; n = n.next_sibling())
|
---|
2919 | node_output(writer, n, indent, flags, depth + 1);
|
---|
2920 |
|
---|
2921 | writer.write('<', '/');
|
---|
2922 | writer.write(name);
|
---|
2923 | writer.write('>');
|
---|
2924 | }
|
---|
2925 | }
|
---|
2926 | else if (!node.first_child())
|
---|
2927 | writer.write(' ', '/', '>', '\n');
|
---|
2928 | else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
|
---|
2929 | {
|
---|
2930 | writer.write('>');
|
---|
2931 |
|
---|
2932 | if (node.first_child().type() == node_pcdata)
|
---|
2933 | text_output_escaped(writer, node.first_child().value(), ctx_special_pcdata);
|
---|
2934 | else
|
---|
2935 | text_output_cdata(writer, node.first_child().value());
|
---|
2936 |
|
---|
2937 | writer.write('<', '/');
|
---|
2938 | writer.write(name);
|
---|
2939 | writer.write('>', '\n');
|
---|
2940 | }
|
---|
2941 | else
|
---|
2942 | {
|
---|
2943 | writer.write('>', '\n');
|
---|
2944 |
|
---|
2945 | for (xml_node n = node.first_child(); n; n = n.next_sibling())
|
---|
2946 | node_output(writer, n, indent, flags, depth + 1);
|
---|
2947 |
|
---|
2948 | if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
|
---|
2949 | for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
|
---|
2950 |
|
---|
2951 | writer.write('<', '/');
|
---|
2952 | writer.write(name);
|
---|
2953 | writer.write('>', '\n');
|
---|
2954 | }
|
---|
2955 |
|
---|
2956 | break;
|
---|
2957 | }
|
---|
2958 |
|
---|
2959 | case node_pcdata:
|
---|
2960 | text_output_escaped(writer, node.value(), ctx_special_pcdata);
|
---|
2961 | if ((flags & format_raw) == 0) writer.write('\n');
|
---|
2962 | break;
|
---|
2963 |
|
---|
2964 | case node_cdata:
|
---|
2965 | text_output_cdata(writer, node.value());
|
---|
2966 | if ((flags & format_raw) == 0) writer.write('\n');
|
---|
2967 | break;
|
---|
2968 |
|
---|
2969 | case node_comment:
|
---|
2970 | writer.write('<', '!', '-', '-');
|
---|
2971 | writer.write(node.value());
|
---|
2972 | writer.write('-', '-', '>');
|
---|
2973 | if ((flags & format_raw) == 0) writer.write('\n');
|
---|
2974 | break;
|
---|
2975 |
|
---|
2976 | case node_pi:
|
---|
2977 | case node_declaration:
|
---|
2978 | writer.write('<', '?');
|
---|
2979 | writer.write(node.name()[0] ? node.name() : default_name);
|
---|
2980 |
|
---|
2981 | if (node.type() == node_declaration)
|
---|
2982 | {
|
---|
2983 | node_output_attributes(writer, node);
|
---|
2984 | }
|
---|
2985 | else if (node.value()[0])
|
---|
2986 | {
|
---|
2987 | writer.write(' ');
|
---|
2988 | writer.write(node.value());
|
---|
2989 | }
|
---|
2990 |
|
---|
2991 | writer.write('?', '>');
|
---|
2992 | if ((flags & format_raw) == 0) writer.write('\n');
|
---|
2993 | break;
|
---|
2994 |
|
---|
2995 | case node_doctype:
|
---|
2996 | writer.write('<', '!', 'D', 'O', 'C');
|
---|
2997 | writer.write('T', 'Y', 'P', 'E');
|
---|
2998 |
|
---|
2999 | if (node.value()[0])
|
---|
3000 | {
|
---|
3001 | writer.write(' ');
|
---|
3002 | writer.write(node.value());
|
---|
3003 | }
|
---|
3004 |
|
---|
3005 | writer.write('>');
|
---|
3006 | if ((flags & format_raw) == 0) writer.write('\n');
|
---|
3007 | break;
|
---|
3008 |
|
---|
3009 | default:
|
---|
3010 | assert(!"Invalid node type");
|
---|
3011 | }
|
---|
3012 | }
|
---|
3013 |
|
---|
3014 | inline bool has_declaration(const xml_node& node)
|
---|
3015 | {
|
---|
3016 | for (xml_node child = node.first_child(); child; child = child.next_sibling())
|
---|
3017 | {
|
---|
3018 | xml_node_type type = child.type();
|
---|
3019 |
|
---|
3020 | if (type == node_declaration) return true;
|
---|
3021 | if (type == node_element) return false;
|
---|
3022 | }
|
---|
3023 |
|
---|
3024 | return false;
|
---|
3025 | }
|
---|
3026 |
|
---|
3027 | inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
|
---|
3028 | {
|
---|
3029 | if (parent != node_document && parent != node_element) return false;
|
---|
3030 | if (child == node_document || child == node_null) return false;
|
---|
3031 | if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
|
---|
3032 |
|
---|
3033 | return true;
|
---|
3034 | }
|
---|
3035 |
|
---|
3036 | void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
|
---|
3037 | {
|
---|
3038 | assert(dest.type() == source.type());
|
---|
3039 |
|
---|
3040 | switch (source.type())
|
---|
3041 | {
|
---|
3042 | case node_element:
|
---|
3043 | {
|
---|
3044 | dest.set_name(source.name());
|
---|
3045 |
|
---|
3046 | for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
|
---|
3047 | dest.append_attribute(a.name()).set_value(a.value());
|
---|
3048 |
|
---|
3049 | for (xml_node c = source.first_child(); c; c = c.next_sibling())
|
---|
3050 | {
|
---|
3051 | if (c == skip) continue;
|
---|
3052 |
|
---|
3053 | xml_node cc = dest.append_child(c.type());
|
---|
3054 | assert(cc);
|
---|
3055 |
|
---|
3056 | recursive_copy_skip(cc, c, skip);
|
---|
3057 | }
|
---|
3058 |
|
---|
3059 | break;
|
---|
3060 | }
|
---|
3061 |
|
---|
3062 | case node_pcdata:
|
---|
3063 | case node_cdata:
|
---|
3064 | case node_comment:
|
---|
3065 | case node_doctype:
|
---|
3066 | dest.set_value(source.value());
|
---|
3067 | break;
|
---|
3068 |
|
---|
3069 | case node_pi:
|
---|
3070 | dest.set_name(source.name());
|
---|
3071 | dest.set_value(source.value());
|
---|
3072 | break;
|
---|
3073 |
|
---|
3074 | case node_declaration:
|
---|
3075 | {
|
---|
3076 | dest.set_name(source.name());
|
---|
3077 |
|
---|
3078 | for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
|
---|
3079 | dest.append_attribute(a.name()).set_value(a.value());
|
---|
3080 |
|
---|
3081 | break;
|
---|
3082 | }
|
---|
3083 |
|
---|
3084 | default:
|
---|
3085 | assert(!"Invalid node type");
|
---|
3086 | }
|
---|
3087 | }
|
---|
3088 |
|
---|
3089 | // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
|
---|
3090 | xml_parse_status get_file_size(FILE* file, size_t& out_result)
|
---|
3091 | {
|
---|
3092 | #if defined(_MSC_VER) && _MSC_VER >= 1400
|
---|
3093 | // there are 64-bit versions of fseek/ftell, let's use them
|
---|
3094 | typedef __int64 length_type;
|
---|
3095 |
|
---|
3096 | _fseeki64(file, 0, SEEK_END);
|
---|
3097 | length_type length = _ftelli64(file);
|
---|
3098 | _fseeki64(file, 0, SEEK_SET);
|
---|
3099 | #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
|
---|
3100 | // there are 64-bit versions of fseek/ftell, let's use them
|
---|
3101 | typedef off64_t length_type;
|
---|
3102 |
|
---|
3103 | fseeko64(file, 0, SEEK_END);
|
---|
3104 | length_type length = ftello64(file);
|
---|
3105 | fseeko64(file, 0, SEEK_SET);
|
---|
3106 | #else
|
---|
3107 | // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
|
---|
3108 | typedef long length_type;
|
---|
3109 |
|
---|
3110 | fseek(file, 0, SEEK_END);
|
---|
3111 | length_type length = ftell(file);
|
---|
3112 | fseek(file, 0, SEEK_SET);
|
---|
3113 | #endif
|
---|
3114 |
|
---|
3115 | // check for I/O errors
|
---|
3116 | if (length < 0) return status_io_error;
|
---|
3117 |
|
---|
3118 | // check for overflow
|
---|
3119 | size_t result = static_cast<size_t>(length);
|
---|
3120 |
|
---|
3121 | if (static_cast<length_type>(result) != length) return status_out_of_memory;
|
---|
3122 |
|
---|
3123 | // finalize
|
---|
3124 | out_result = result;
|
---|
3125 |
|
---|
3126 | return status_ok;
|
---|
3127 | }
|
---|
3128 |
|
---|
3129 | xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
|
---|
3130 | {
|
---|
3131 | if (!file) return make_parse_result(status_file_not_found);
|
---|
3132 |
|
---|
3133 | // get file size (can result in I/O errors)
|
---|
3134 | size_t size = 0;
|
---|
3135 | xml_parse_status size_status = get_file_size(file, size);
|
---|
3136 |
|
---|
3137 | if (size_status != status_ok)
|
---|
3138 | {
|
---|
3139 | fclose(file);
|
---|
3140 | return make_parse_result(size_status);
|
---|
3141 | }
|
---|
3142 |
|
---|
3143 | // allocate buffer for the whole file
|
---|
3144 | char* contents = static_cast<char*>(global_allocate(size > 0 ? size : 1));
|
---|
3145 |
|
---|
3146 | if (!contents)
|
---|
3147 | {
|
---|
3148 | fclose(file);
|
---|
3149 | return make_parse_result(status_out_of_memory);
|
---|
3150 | }
|
---|
3151 |
|
---|
3152 | // read file in memory
|
---|
3153 | size_t read_size = fread(contents, 1, size, file);
|
---|
3154 | fclose(file);
|
---|
3155 |
|
---|
3156 | if (read_size != size)
|
---|
3157 | {
|
---|
3158 | global_deallocate(contents);
|
---|
3159 | return make_parse_result(status_io_error);
|
---|
3160 | }
|
---|
3161 |
|
---|
3162 | return doc.load_buffer_inplace_own(contents, size, options, encoding);
|
---|
3163 | }
|
---|
3164 |
|
---|
3165 | #ifndef PUGIXML_NO_STL
|
---|
3166 | template <typename T> xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
|
---|
3167 | {
|
---|
3168 | // get length of remaining data in stream
|
---|
3169 | typename std::basic_istream<T>::pos_type pos = stream.tellg();
|
---|
3170 | stream.seekg(0, std::ios::end);
|
---|
3171 | std::streamoff length = stream.tellg() - pos;
|
---|
3172 | stream.seekg(pos);
|
---|
3173 |
|
---|
3174 | if (stream.fail() || pos < 0) return make_parse_result(status_io_error);
|
---|
3175 |
|
---|
3176 | // guard against huge files
|
---|
3177 | size_t read_length = static_cast<size_t>(length);
|
---|
3178 |
|
---|
3179 | if (static_cast<std::streamsize>(read_length) != length || length < 0) return make_parse_result(status_out_of_memory);
|
---|
3180 |
|
---|
3181 | // read stream data into memory (guard against stream exceptions with buffer holder)
|
---|
3182 | buffer_holder buffer(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T)), global_deallocate);
|
---|
3183 | if (!buffer.data) return make_parse_result(status_out_of_memory);
|
---|
3184 |
|
---|
3185 | stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
|
---|
3186 |
|
---|
3187 | // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
|
---|
3188 | if (stream.bad()) return make_parse_result(status_io_error);
|
---|
3189 |
|
---|
3190 | // load data from buffer
|
---|
3191 | size_t actual_length = static_cast<size_t>(stream.gcount());
|
---|
3192 | assert(actual_length <= read_length);
|
---|
3193 |
|
---|
3194 | return doc.load_buffer_inplace_own(buffer.release(), actual_length * sizeof(T), options, encoding);
|
---|
3195 | }
|
---|
3196 | #endif
|
---|
3197 |
|
---|
3198 | #if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__MINGW32__)
|
---|
3199 | FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
|
---|
3200 | {
|
---|
3201 | return _wfopen(path, mode);
|
---|
3202 | }
|
---|
3203 | #else
|
---|
3204 | char* convert_path_heap(const wchar_t* str)
|
---|
3205 | {
|
---|
3206 | assert(str);
|
---|
3207 |
|
---|
3208 | // first pass: get length in utf8 characters
|
---|
3209 | size_t length = wcslen(str);
|
---|
3210 | size_t size = as_utf8_begin(str, length);
|
---|
3211 |
|
---|
3212 | // allocate resulting string
|
---|
3213 | char* result = static_cast<char*>(global_allocate(size + 1));
|
---|
3214 | if (!result) return 0;
|
---|
3215 |
|
---|
3216 | // second pass: convert to utf8
|
---|
3217 | as_utf8_end(result, size, str, length);
|
---|
3218 |
|
---|
3219 | return result;
|
---|
3220 | }
|
---|
3221 |
|
---|
3222 | FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
|
---|
3223 | {
|
---|
3224 | // there is no standard function to open wide paths, so our best bet is to try utf8 path
|
---|
3225 | char* path_utf8 = convert_path_heap(path);
|
---|
3226 | if (!path_utf8) return 0;
|
---|
3227 |
|
---|
3228 | // convert mode to ASCII (we mirror _wfopen interface)
|
---|
3229 | char mode_ascii[4] = {0};
|
---|
3230 | for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
|
---|
3231 |
|
---|
3232 | // try to open the utf8 path
|
---|
3233 | FILE* result = fopen(path_utf8, mode_ascii);
|
---|
3234 |
|
---|
3235 | // free dummy buffer
|
---|
3236 | global_deallocate(path_utf8);
|
---|
3237 |
|
---|
3238 | return result;
|
---|
3239 | }
|
---|
3240 | #endif
|
---|
3241 | }
|
---|
3242 |
|
---|
3243 | namespace pugi
|
---|
3244 | {
|
---|
3245 | xml_writer_file::xml_writer_file(void* file): file(file)
|
---|
3246 | {
|
---|
3247 | }
|
---|
3248 |
|
---|
3249 | void xml_writer_file::write(const void* data, size_t size)
|
---|
3250 | {
|
---|
3251 | fwrite(data, size, 1, static_cast<FILE*>(file));
|
---|
3252 | }
|
---|
3253 |
|
---|
3254 | #ifndef PUGIXML_NO_STL
|
---|
3255 | xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
|
---|
3256 | {
|
---|
3257 | }
|
---|
3258 |
|
---|
3259 | xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
|
---|
3260 | {
|
---|
3261 | }
|
---|
3262 |
|
---|
3263 | void xml_writer_stream::write(const void* data, size_t size)
|
---|
3264 | {
|
---|
3265 | if (narrow_stream)
|
---|
3266 | {
|
---|
3267 | assert(!wide_stream);
|
---|
3268 | narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
|
---|
3269 | }
|
---|
3270 | else
|
---|
3271 | {
|
---|
3272 | assert(wide_stream);
|
---|
3273 | assert(size % sizeof(wchar_t) == 0);
|
---|
3274 |
|
---|
3275 | wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
|
---|
3276 | }
|
---|
3277 | }
|
---|
3278 | #endif
|
---|
3279 |
|
---|
3280 | xml_tree_walker::xml_tree_walker(): _depth(0)
|
---|
3281 | {
|
---|
3282 | }
|
---|
3283 |
|
---|
3284 | xml_tree_walker::~xml_tree_walker()
|
---|
3285 | {
|
---|
3286 | }
|
---|
3287 |
|
---|
3288 | int xml_tree_walker::depth() const
|
---|
3289 | {
|
---|
3290 | return _depth;
|
---|
3291 | }
|
---|
3292 |
|
---|
3293 | bool xml_tree_walker::begin(xml_node&)
|
---|
3294 | {
|
---|
3295 | return true;
|
---|
3296 | }
|
---|
3297 |
|
---|
3298 | bool xml_tree_walker::end(xml_node&)
|
---|
3299 | {
|
---|
3300 | return true;
|
---|
3301 | }
|
---|
3302 |
|
---|
3303 | xml_attribute::xml_attribute(): _attr(0)
|
---|
3304 | {
|
---|
3305 | }
|
---|
3306 |
|
---|
3307 | xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
|
---|
3308 | {
|
---|
3309 | }
|
---|
3310 |
|
---|
3311 | xml_attribute::operator xml_attribute::unspecified_bool_type() const
|
---|
3312 | {
|
---|
3313 | return _attr ? &xml_attribute::_attr : 0;
|
---|
3314 | }
|
---|
3315 |
|
---|
3316 | bool xml_attribute::operator!() const
|
---|
3317 | {
|
---|
3318 | return !_attr;
|
---|
3319 | }
|
---|
3320 |
|
---|
3321 | bool xml_attribute::operator==(const xml_attribute& r) const
|
---|
3322 | {
|
---|
3323 | return (_attr == r._attr);
|
---|
3324 | }
|
---|
3325 |
|
---|
3326 | bool xml_attribute::operator!=(const xml_attribute& r) const
|
---|
3327 | {
|
---|
3328 | return (_attr != r._attr);
|
---|
3329 | }
|
---|
3330 |
|
---|
3331 | bool xml_attribute::operator<(const xml_attribute& r) const
|
---|
3332 | {
|
---|
3333 | return (_attr < r._attr);
|
---|
3334 | }
|
---|
3335 |
|
---|
3336 | bool xml_attribute::operator>(const xml_attribute& r) const
|
---|
3337 | {
|
---|
3338 | return (_attr > r._attr);
|
---|
3339 | }
|
---|
3340 |
|
---|
3341 | bool xml_attribute::operator<=(const xml_attribute& r) const
|
---|
3342 | {
|
---|
3343 | return (_attr <= r._attr);
|
---|
3344 | }
|
---|
3345 |
|
---|
3346 | bool xml_attribute::operator>=(const xml_attribute& r) const
|
---|
3347 | {
|
---|
3348 | return (_attr >= r._attr);
|
---|
3349 | }
|
---|
3350 |
|
---|
3351 | xml_attribute xml_attribute::next_attribute() const
|
---|
3352 | {
|
---|
3353 | return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
|
---|
3354 | }
|
---|
3355 |
|
---|
3356 | xml_attribute xml_attribute::previous_attribute() const
|
---|
3357 | {
|
---|
3358 | return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
|
---|
3359 | }
|
---|
3360 |
|
---|
3361 | int xml_attribute::as_int() const
|
---|
3362 | {
|
---|
3363 | if (!_attr || !_attr->value) return 0;
|
---|
3364 |
|
---|
3365 | #ifdef PUGIXML_WCHAR_MODE
|
---|
3366 | return (int)wcstol(_attr->value, 0, 10);
|
---|
3367 | #else
|
---|
3368 | return (int)strtol(_attr->value, 0, 10);
|
---|
3369 | #endif
|
---|
3370 | }
|
---|
3371 |
|
---|
3372 | unsigned int xml_attribute::as_uint() const
|
---|
3373 | {
|
---|
3374 | if (!_attr || !_attr->value) return 0;
|
---|
3375 |
|
---|
3376 | #ifdef PUGIXML_WCHAR_MODE
|
---|
3377 | return (unsigned int)wcstoul(_attr->value, 0, 10);
|
---|
3378 | #else
|
---|
3379 | return (unsigned int)strtoul(_attr->value, 0, 10);
|
---|
3380 | #endif
|
---|
3381 | }
|
---|
3382 |
|
---|
3383 | double xml_attribute::as_double() const
|
---|
3384 | {
|
---|
3385 | if (!_attr || !_attr->value) return 0;
|
---|
3386 |
|
---|
3387 | #ifdef PUGIXML_WCHAR_MODE
|
---|
3388 | return wcstod(_attr->value, 0);
|
---|
3389 | #else
|
---|
3390 | return strtod(_attr->value, 0);
|
---|
3391 | #endif
|
---|
3392 | }
|
---|
3393 |
|
---|
3394 | float xml_attribute::as_float() const
|
---|
3395 | {
|
---|
3396 | if (!_attr || !_attr->value) return 0;
|
---|
3397 |
|
---|
3398 | #ifdef PUGIXML_WCHAR_MODE
|
---|
3399 | return (float)wcstod(_attr->value, 0);
|
---|
3400 | #else
|
---|
3401 | return (float)strtod(_attr->value, 0);
|
---|
3402 | #endif
|
---|
3403 | }
|
---|
3404 |
|
---|
3405 | bool xml_attribute::as_bool() const
|
---|
3406 | {
|
---|
3407 | if (!_attr || !_attr->value) return false;
|
---|
3408 |
|
---|
3409 | // only look at first char
|
---|
3410 | char_t first = *_attr->value;
|
---|
3411 |
|
---|
3412 | // 1*, t* (true), T* (True), y* (yes), Y* (YES)
|
---|
3413 | return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
|
---|
3414 | }
|
---|
3415 |
|
---|
3416 | bool xml_attribute::empty() const
|
---|
3417 | {
|
---|
3418 | return !_attr;
|
---|
3419 | }
|
---|
3420 |
|
---|
3421 | const char_t* xml_attribute::name() const
|
---|
3422 | {
|
---|
3423 | return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
|
---|
3424 | }
|
---|
3425 |
|
---|
3426 | const char_t* xml_attribute::value() const
|
---|
3427 | {
|
---|
3428 | return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
|
---|
3429 | }
|
---|
3430 |
|
---|
3431 | size_t xml_attribute::hash_value() const
|
---|
3432 | {
|
---|
3433 | return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
|
---|
3434 | }
|
---|
3435 |
|
---|
3436 | xml_attribute_struct* xml_attribute::internal_object() const
|
---|
3437 | {
|
---|
3438 | return _attr;
|
---|
3439 | }
|
---|
3440 |
|
---|
3441 | xml_attribute& xml_attribute::operator=(const char_t* rhs)
|
---|
3442 | {
|
---|
3443 | set_value(rhs);
|
---|
3444 | return *this;
|
---|
3445 | }
|
---|
3446 |
|
---|
3447 | xml_attribute& xml_attribute::operator=(int rhs)
|
---|
3448 | {
|
---|
3449 | set_value(rhs);
|
---|
3450 | return *this;
|
---|
3451 | }
|
---|
3452 |
|
---|
3453 | xml_attribute& xml_attribute::operator=(unsigned int rhs)
|
---|
3454 | {
|
---|
3455 | set_value(rhs);
|
---|
3456 | return *this;
|
---|
3457 | }
|
---|
3458 |
|
---|
3459 | xml_attribute& xml_attribute::operator=(double rhs)
|
---|
3460 | {
|
---|
3461 | set_value(rhs);
|
---|
3462 | return *this;
|
---|
3463 | }
|
---|
3464 |
|
---|
3465 | xml_attribute& xml_attribute::operator=(bool rhs)
|
---|
3466 | {
|
---|
3467 | set_value(rhs);
|
---|
3468 | return *this;
|
---|
3469 | }
|
---|
3470 |
|
---|
3471 | bool xml_attribute::set_name(const char_t* rhs)
|
---|
3472 | {
|
---|
3473 | if (!_attr) return false;
|
---|
3474 |
|
---|
3475 | return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs);
|
---|
3476 | }
|
---|
3477 |
|
---|
3478 | bool xml_attribute::set_value(const char_t* rhs)
|
---|
3479 | {
|
---|
3480 | if (!_attr) return false;
|
---|
3481 |
|
---|
3482 | return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs);
|
---|
3483 | }
|
---|
3484 |
|
---|
3485 | bool xml_attribute::set_value(int rhs)
|
---|
3486 | {
|
---|
3487 | char buf[128];
|
---|
3488 | sprintf(buf, "%d", rhs);
|
---|
3489 |
|
---|
3490 | #ifdef PUGIXML_WCHAR_MODE
|
---|
3491 | char_t wbuf[128];
|
---|
3492 | widen_ascii(wbuf, buf);
|
---|
3493 |
|
---|
3494 | return set_value(wbuf);
|
---|
3495 | #else
|
---|
3496 | return set_value(buf);
|
---|
3497 | #endif
|
---|
3498 | }
|
---|
3499 |
|
---|
3500 | bool xml_attribute::set_value(unsigned int rhs)
|
---|
3501 | {
|
---|
3502 | char buf[128];
|
---|
3503 | sprintf(buf, "%u", rhs);
|
---|
3504 |
|
---|
3505 | #ifdef PUGIXML_WCHAR_MODE
|
---|
3506 | char_t wbuf[128];
|
---|
3507 | widen_ascii(wbuf, buf);
|
---|
3508 |
|
---|
3509 | return set_value(wbuf);
|
---|
3510 | #else
|
---|
3511 | return set_value(buf);
|
---|
3512 | #endif
|
---|
3513 | }
|
---|
3514 |
|
---|
3515 | bool xml_attribute::set_value(double rhs)
|
---|
3516 | {
|
---|
3517 | char buf[128];
|
---|
3518 | sprintf(buf, "%g", rhs);
|
---|
3519 |
|
---|
3520 | #ifdef PUGIXML_WCHAR_MODE
|
---|
3521 | char_t wbuf[128];
|
---|
3522 | widen_ascii(wbuf, buf);
|
---|
3523 |
|
---|
3524 | return set_value(wbuf);
|
---|
3525 | #else
|
---|
3526 | return set_value(buf);
|
---|
3527 | #endif
|
---|
3528 | }
|
---|
3529 |
|
---|
3530 | bool xml_attribute::set_value(bool rhs)
|
---|
3531 | {
|
---|
3532 | return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
|
---|
3533 | }
|
---|
3534 |
|
---|
3535 | #ifdef __BORLANDC__
|
---|
3536 | bool operator&&(const xml_attribute& lhs, bool rhs)
|
---|
3537 | {
|
---|
3538 | return (bool)lhs && rhs;
|
---|
3539 | }
|
---|
3540 |
|
---|
3541 | bool operator||(const xml_attribute& lhs, bool rhs)
|
---|
3542 | {
|
---|
3543 | return (bool)lhs || rhs;
|
---|
3544 | }
|
---|
3545 | #endif
|
---|
3546 |
|
---|
3547 | xml_node::xml_node(): _root(0)
|
---|
3548 | {
|
---|
3549 | }
|
---|
3550 |
|
---|
3551 | xml_node::xml_node(xml_node_struct* p): _root(p)
|
---|
3552 | {
|
---|
3553 | }
|
---|
3554 |
|
---|
3555 | xml_node::operator xml_node::unspecified_bool_type() const
|
---|
3556 | {
|
---|
3557 | return _root ? &xml_node::_root : 0;
|
---|
3558 | }
|
---|
3559 |
|
---|
3560 | bool xml_node::operator!() const
|
---|
3561 | {
|
---|
3562 | return !_root;
|
---|
3563 | }
|
---|
3564 |
|
---|
3565 | xml_node::iterator xml_node::begin() const
|
---|
3566 | {
|
---|
3567 | return iterator(_root ? _root->first_child : 0, _root);
|
---|
3568 | }
|
---|
3569 |
|
---|
3570 | xml_node::iterator xml_node::end() const
|
---|
3571 | {
|
---|
3572 | return iterator(0, _root);
|
---|
3573 | }
|
---|
3574 |
|
---|
3575 | xml_node::attribute_iterator xml_node::attributes_begin() const
|
---|
3576 | {
|
---|
3577 | return attribute_iterator(_root ? _root->first_attribute : 0, _root);
|
---|
3578 | }
|
---|
3579 |
|
---|
3580 | xml_node::attribute_iterator xml_node::attributes_end() const
|
---|
3581 | {
|
---|
3582 | return attribute_iterator(0, _root);
|
---|
3583 | }
|
---|
3584 |
|
---|
3585 | bool xml_node::operator==(const xml_node& r) const
|
---|
3586 | {
|
---|
3587 | return (_root == r._root);
|
---|
3588 | }
|
---|
3589 |
|
---|
3590 | bool xml_node::operator!=(const xml_node& r) const
|
---|
3591 | {
|
---|
3592 | return (_root != r._root);
|
---|
3593 | }
|
---|
3594 |
|
---|
3595 | bool xml_node::operator<(const xml_node& r) const
|
---|
3596 | {
|
---|
3597 | return (_root < r._root);
|
---|
3598 | }
|
---|
3599 |
|
---|
3600 | bool xml_node::operator>(const xml_node& r) const
|
---|
3601 | {
|
---|
3602 | return (_root > r._root);
|
---|
3603 | }
|
---|
3604 |
|
---|
3605 | bool xml_node::operator<=(const xml_node& r) const
|
---|
3606 | {
|
---|
3607 | return (_root <= r._root);
|
---|
3608 | }
|
---|
3609 |
|
---|
3610 | bool xml_node::operator>=(const xml_node& r) const
|
---|
3611 | {
|
---|
3612 | return (_root >= r._root);
|
---|
3613 | }
|
---|
3614 |
|
---|
3615 | bool xml_node::empty() const
|
---|
3616 | {
|
---|
3617 | return !_root;
|
---|
3618 | }
|
---|
3619 |
|
---|
3620 | const char_t* xml_node::name() const
|
---|
3621 | {
|
---|
3622 | return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
|
---|
3623 | }
|
---|
3624 |
|
---|
3625 | xml_node_type xml_node::type() const
|
---|
3626 | {
|
---|
3627 | return _root ? static_cast<xml_node_type>((_root->header & xml_memory_page_type_mask) + 1) : node_null;
|
---|
3628 | }
|
---|
3629 |
|
---|
3630 | const char_t* xml_node::value() const
|
---|
3631 | {
|
---|
3632 | return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
|
---|
3633 | }
|
---|
3634 |
|
---|
3635 | xml_node xml_node::child(const char_t* name) const
|
---|
3636 | {
|
---|
3637 | if (!_root) return xml_node();
|
---|
3638 |
|
---|
3639 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
|
---|
3640 | if (i->name && strequal(name, i->name)) return xml_node(i);
|
---|
3641 |
|
---|
3642 | return xml_node();
|
---|
3643 | }
|
---|
3644 |
|
---|
3645 | xml_attribute xml_node::attribute(const char_t* name) const
|
---|
3646 | {
|
---|
3647 | if (!_root) return xml_attribute();
|
---|
3648 |
|
---|
3649 | for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
|
---|
3650 | if (i->name && strequal(name, i->name))
|
---|
3651 | return xml_attribute(i);
|
---|
3652 |
|
---|
3653 | return xml_attribute();
|
---|
3654 | }
|
---|
3655 |
|
---|
3656 | xml_node xml_node::next_sibling(const char_t* name) const
|
---|
3657 | {
|
---|
3658 | if (!_root) return xml_node();
|
---|
3659 |
|
---|
3660 | for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
|
---|
3661 | if (i->name && strequal(name, i->name)) return xml_node(i);
|
---|
3662 |
|
---|
3663 | return xml_node();
|
---|
3664 | }
|
---|
3665 |
|
---|
3666 | xml_node xml_node::next_sibling() const
|
---|
3667 | {
|
---|
3668 | if (!_root) return xml_node();
|
---|
3669 |
|
---|
3670 | if (_root->next_sibling) return xml_node(_root->next_sibling);
|
---|
3671 | else return xml_node();
|
---|
3672 | }
|
---|
3673 |
|
---|
3674 | xml_node xml_node::previous_sibling(const char_t* name) const
|
---|
3675 | {
|
---|
3676 | if (!_root) return xml_node();
|
---|
3677 |
|
---|
3678 | for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
|
---|
3679 | if (i->name && strequal(name, i->name)) return xml_node(i);
|
---|
3680 |
|
---|
3681 | return xml_node();
|
---|
3682 | }
|
---|
3683 |
|
---|
3684 | xml_node xml_node::previous_sibling() const
|
---|
3685 | {
|
---|
3686 | if (!_root) return xml_node();
|
---|
3687 |
|
---|
3688 | if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
|
---|
3689 | else return xml_node();
|
---|
3690 | }
|
---|
3691 |
|
---|
3692 | xml_node xml_node::parent() const
|
---|
3693 | {
|
---|
3694 | return _root ? xml_node(_root->parent) : xml_node();
|
---|
3695 | }
|
---|
3696 |
|
---|
3697 | xml_node xml_node::root() const
|
---|
3698 | {
|
---|
3699 | if (!_root) return xml_node();
|
---|
3700 |
|
---|
3701 | xml_memory_page* page = reinterpret_cast<xml_memory_page*>(_root->header & xml_memory_page_pointer_mask);
|
---|
3702 |
|
---|
3703 | return xml_node(static_cast<xml_document_struct*>(page->allocator));
|
---|
3704 | }
|
---|
3705 |
|
---|
3706 | const char_t* xml_node::child_value() const
|
---|
3707 | {
|
---|
3708 | if (!_root) return PUGIXML_TEXT("");
|
---|
3709 |
|
---|
3710 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
|
---|
3711 | {
|
---|
3712 | xml_node_type type = static_cast<xml_node_type>((i->header & xml_memory_page_type_mask) + 1);
|
---|
3713 |
|
---|
3714 | if (i->value && (type == node_pcdata || type == node_cdata))
|
---|
3715 | return i->value;
|
---|
3716 | }
|
---|
3717 |
|
---|
3718 | return PUGIXML_TEXT("");
|
---|
3719 | }
|
---|
3720 |
|
---|
3721 | const char_t* xml_node::child_value(const char_t* name) const
|
---|
3722 | {
|
---|
3723 | return child(name).child_value();
|
---|
3724 | }
|
---|
3725 |
|
---|
3726 | xml_attribute xml_node::first_attribute() const
|
---|
3727 | {
|
---|
3728 | return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
|
---|
3729 | }
|
---|
3730 |
|
---|
3731 | xml_attribute xml_node::last_attribute() const
|
---|
3732 | {
|
---|
3733 | return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
|
---|
3734 | }
|
---|
3735 |
|
---|
3736 | xml_node xml_node::first_child() const
|
---|
3737 | {
|
---|
3738 | return _root ? xml_node(_root->first_child) : xml_node();
|
---|
3739 | }
|
---|
3740 |
|
---|
3741 | xml_node xml_node::last_child() const
|
---|
3742 | {
|
---|
3743 | return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
|
---|
3744 | }
|
---|
3745 |
|
---|
3746 | bool xml_node::set_name(const char_t* rhs)
|
---|
3747 | {
|
---|
3748 | switch (type())
|
---|
3749 | {
|
---|
3750 | case node_pi:
|
---|
3751 | case node_declaration:
|
---|
3752 | case node_element:
|
---|
3753 | return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs);
|
---|
3754 |
|
---|
3755 | default:
|
---|
3756 | return false;
|
---|
3757 | }
|
---|
3758 | }
|
---|
3759 |
|
---|
3760 | bool xml_node::set_value(const char_t* rhs)
|
---|
3761 | {
|
---|
3762 | switch (type())
|
---|
3763 | {
|
---|
3764 | case node_pi:
|
---|
3765 | case node_cdata:
|
---|
3766 | case node_pcdata:
|
---|
3767 | case node_comment:
|
---|
3768 | case node_doctype:
|
---|
3769 | return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs);
|
---|
3770 |
|
---|
3771 | default:
|
---|
3772 | return false;
|
---|
3773 | }
|
---|
3774 | }
|
---|
3775 |
|
---|
3776 | xml_attribute xml_node::append_attribute(const char_t* name)
|
---|
3777 | {
|
---|
3778 | if (type() != node_element && type() != node_declaration) return xml_attribute();
|
---|
3779 |
|
---|
3780 | xml_attribute a(append_attribute_ll(_root, get_allocator(_root)));
|
---|
3781 | a.set_name(name);
|
---|
3782 |
|
---|
3783 | return a;
|
---|
3784 | }
|
---|
3785 |
|
---|
3786 | xml_attribute xml_node::prepend_attribute(const char_t* name)
|
---|
3787 | {
|
---|
3788 | if (type() != node_element && type() != node_declaration) return xml_attribute();
|
---|
3789 |
|
---|
3790 | xml_attribute a(allocate_attribute(get_allocator(_root)));
|
---|
3791 | if (!a) return xml_attribute();
|
---|
3792 |
|
---|
3793 | a.set_name(name);
|
---|
3794 |
|
---|
3795 | xml_attribute_struct* head = _root->first_attribute;
|
---|
3796 |
|
---|
3797 | if (head)
|
---|
3798 | {
|
---|
3799 | a._attr->prev_attribute_c = head->prev_attribute_c;
|
---|
3800 | head->prev_attribute_c = a._attr;
|
---|
3801 | }
|
---|
3802 | else
|
---|
3803 | a._attr->prev_attribute_c = a._attr;
|
---|
3804 |
|
---|
3805 | a._attr->next_attribute = head;
|
---|
3806 | _root->first_attribute = a._attr;
|
---|
3807 |
|
---|
3808 | return a;
|
---|
3809 | }
|
---|
3810 |
|
---|
3811 | xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr)
|
---|
3812 | {
|
---|
3813 | if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
|
---|
3814 |
|
---|
3815 | // check that attribute belongs to *this
|
---|
3816 | xml_attribute_struct* cur = attr._attr;
|
---|
3817 |
|
---|
3818 | while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
|
---|
3819 |
|
---|
3820 | if (cur != _root->first_attribute) return xml_attribute();
|
---|
3821 |
|
---|
3822 | xml_attribute a(allocate_attribute(get_allocator(_root)));
|
---|
3823 | if (!a) return xml_attribute();
|
---|
3824 |
|
---|
3825 | a.set_name(name);
|
---|
3826 |
|
---|
3827 | if (attr._attr->prev_attribute_c->next_attribute)
|
---|
3828 | attr._attr->prev_attribute_c->next_attribute = a._attr;
|
---|
3829 | else
|
---|
3830 | _root->first_attribute = a._attr;
|
---|
3831 |
|
---|
3832 | a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
|
---|
3833 | a._attr->next_attribute = attr._attr;
|
---|
3834 | attr._attr->prev_attribute_c = a._attr;
|
---|
3835 |
|
---|
3836 | return a;
|
---|
3837 | }
|
---|
3838 |
|
---|
3839 | xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr)
|
---|
3840 | {
|
---|
3841 | if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
|
---|
3842 |
|
---|
3843 | // check that attribute belongs to *this
|
---|
3844 | xml_attribute_struct* cur = attr._attr;
|
---|
3845 |
|
---|
3846 | while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
|
---|
3847 |
|
---|
3848 | if (cur != _root->first_attribute) return xml_attribute();
|
---|
3849 |
|
---|
3850 | xml_attribute a(allocate_attribute(get_allocator(_root)));
|
---|
3851 | if (!a) return xml_attribute();
|
---|
3852 |
|
---|
3853 | a.set_name(name);
|
---|
3854 |
|
---|
3855 | if (attr._attr->next_attribute)
|
---|
3856 | attr._attr->next_attribute->prev_attribute_c = a._attr;
|
---|
3857 | else
|
---|
3858 | _root->first_attribute->prev_attribute_c = a._attr;
|
---|
3859 |
|
---|
3860 | a._attr->next_attribute = attr._attr->next_attribute;
|
---|
3861 | a._attr->prev_attribute_c = attr._attr;
|
---|
3862 | attr._attr->next_attribute = a._attr;
|
---|
3863 |
|
---|
3864 | return a;
|
---|
3865 | }
|
---|
3866 |
|
---|
3867 | xml_attribute xml_node::append_copy(const xml_attribute& proto)
|
---|
3868 | {
|
---|
3869 | if (!proto) return xml_attribute();
|
---|
3870 |
|
---|
3871 | xml_attribute result = append_attribute(proto.name());
|
---|
3872 | result.set_value(proto.value());
|
---|
3873 |
|
---|
3874 | return result;
|
---|
3875 | }
|
---|
3876 |
|
---|
3877 | xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
|
---|
3878 | {
|
---|
3879 | if (!proto) return xml_attribute();
|
---|
3880 |
|
---|
3881 | xml_attribute result = prepend_attribute(proto.name());
|
---|
3882 | result.set_value(proto.value());
|
---|
3883 |
|
---|
3884 | return result;
|
---|
3885 | }
|
---|
3886 |
|
---|
3887 | xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
|
---|
3888 | {
|
---|
3889 | if (!proto) return xml_attribute();
|
---|
3890 |
|
---|
3891 | xml_attribute result = insert_attribute_after(proto.name(), attr);
|
---|
3892 | result.set_value(proto.value());
|
---|
3893 |
|
---|
3894 | return result;
|
---|
3895 | }
|
---|
3896 |
|
---|
3897 | xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
|
---|
3898 | {
|
---|
3899 | if (!proto) return xml_attribute();
|
---|
3900 |
|
---|
3901 | xml_attribute result = insert_attribute_before(proto.name(), attr);
|
---|
3902 | result.set_value(proto.value());
|
---|
3903 |
|
---|
3904 | return result;
|
---|
3905 | }
|
---|
3906 |
|
---|
3907 | xml_node xml_node::append_child(xml_node_type type)
|
---|
3908 | {
|
---|
3909 | if (!allow_insert_child(this->type(), type)) return xml_node();
|
---|
3910 |
|
---|
3911 | xml_node n(append_node(_root, get_allocator(_root), type));
|
---|
3912 |
|
---|
3913 | if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
|
---|
3914 |
|
---|
3915 | return n;
|
---|
3916 | }
|
---|
3917 |
|
---|
3918 | xml_node xml_node::prepend_child(xml_node_type type)
|
---|
3919 | {
|
---|
3920 | if (!allow_insert_child(this->type(), type)) return xml_node();
|
---|
3921 |
|
---|
3922 | xml_node n(allocate_node(get_allocator(_root), type));
|
---|
3923 | if (!n) return xml_node();
|
---|
3924 |
|
---|
3925 | n._root->parent = _root;
|
---|
3926 |
|
---|
3927 | xml_node_struct* head = _root->first_child;
|
---|
3928 |
|
---|
3929 | if (head)
|
---|
3930 | {
|
---|
3931 | n._root->prev_sibling_c = head->prev_sibling_c;
|
---|
3932 | head->prev_sibling_c = n._root;
|
---|
3933 | }
|
---|
3934 | else
|
---|
3935 | n._root->prev_sibling_c = n._root;
|
---|
3936 |
|
---|
3937 | n._root->next_sibling = head;
|
---|
3938 | _root->first_child = n._root;
|
---|
3939 |
|
---|
3940 | if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
|
---|
3941 |
|
---|
3942 | return n;
|
---|
3943 | }
|
---|
3944 |
|
---|
3945 | xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node)
|
---|
3946 | {
|
---|
3947 | if (!allow_insert_child(this->type(), type)) return xml_node();
|
---|
3948 | if (!node._root || node._root->parent != _root) return xml_node();
|
---|
3949 |
|
---|
3950 | xml_node n(allocate_node(get_allocator(_root), type));
|
---|
3951 | if (!n) return xml_node();
|
---|
3952 |
|
---|
3953 | n._root->parent = _root;
|
---|
3954 |
|
---|
3955 | if (node._root->prev_sibling_c->next_sibling)
|
---|
3956 | node._root->prev_sibling_c->next_sibling = n._root;
|
---|
3957 | else
|
---|
3958 | _root->first_child = n._root;
|
---|
3959 |
|
---|
3960 | n._root->prev_sibling_c = node._root->prev_sibling_c;
|
---|
3961 | n._root->next_sibling = node._root;
|
---|
3962 | node._root->prev_sibling_c = n._root;
|
---|
3963 |
|
---|
3964 | if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
|
---|
3965 |
|
---|
3966 | return n;
|
---|
3967 | }
|
---|
3968 |
|
---|
3969 | xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node)
|
---|
3970 | {
|
---|
3971 | if (!allow_insert_child(this->type(), type)) return xml_node();
|
---|
3972 | if (!node._root || node._root->parent != _root) return xml_node();
|
---|
3973 |
|
---|
3974 | xml_node n(allocate_node(get_allocator(_root), type));
|
---|
3975 | if (!n) return xml_node();
|
---|
3976 |
|
---|
3977 | n._root->parent = _root;
|
---|
3978 |
|
---|
3979 | if (node._root->next_sibling)
|
---|
3980 | node._root->next_sibling->prev_sibling_c = n._root;
|
---|
3981 | else
|
---|
3982 | _root->first_child->prev_sibling_c = n._root;
|
---|
3983 |
|
---|
3984 | n._root->next_sibling = node._root->next_sibling;
|
---|
3985 | n._root->prev_sibling_c = node._root;
|
---|
3986 | node._root->next_sibling = n._root;
|
---|
3987 |
|
---|
3988 | if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
|
---|
3989 |
|
---|
3990 | return n;
|
---|
3991 | }
|
---|
3992 |
|
---|
3993 | xml_node xml_node::append_child(const char_t* name)
|
---|
3994 | {
|
---|
3995 | xml_node result = append_child(node_element);
|
---|
3996 |
|
---|
3997 | result.set_name(name);
|
---|
3998 |
|
---|
3999 | return result;
|
---|
4000 | }
|
---|
4001 |
|
---|
4002 | xml_node xml_node::prepend_child(const char_t* name)
|
---|
4003 | {
|
---|
4004 | xml_node result = prepend_child(node_element);
|
---|
4005 |
|
---|
4006 | result.set_name(name);
|
---|
4007 |
|
---|
4008 | return result;
|
---|
4009 | }
|
---|
4010 |
|
---|
4011 | xml_node xml_node::insert_child_after(const char_t* name, const xml_node& node)
|
---|
4012 | {
|
---|
4013 | xml_node result = insert_child_after(node_element, node);
|
---|
4014 |
|
---|
4015 | result.set_name(name);
|
---|
4016 |
|
---|
4017 | return result;
|
---|
4018 | }
|
---|
4019 |
|
---|
4020 | xml_node xml_node::insert_child_before(const char_t* name, const xml_node& node)
|
---|
4021 | {
|
---|
4022 | xml_node result = insert_child_before(node_element, node);
|
---|
4023 |
|
---|
4024 | result.set_name(name);
|
---|
4025 |
|
---|
4026 | return result;
|
---|
4027 | }
|
---|
4028 |
|
---|
4029 | xml_node xml_node::append_copy(const xml_node& proto)
|
---|
4030 | {
|
---|
4031 | xml_node result = append_child(proto.type());
|
---|
4032 |
|
---|
4033 | if (result) recursive_copy_skip(result, proto, result);
|
---|
4034 |
|
---|
4035 | return result;
|
---|
4036 | }
|
---|
4037 |
|
---|
4038 | xml_node xml_node::prepend_copy(const xml_node& proto)
|
---|
4039 | {
|
---|
4040 | xml_node result = prepend_child(proto.type());
|
---|
4041 |
|
---|
4042 | if (result) recursive_copy_skip(result, proto, result);
|
---|
4043 |
|
---|
4044 | return result;
|
---|
4045 | }
|
---|
4046 |
|
---|
4047 | xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
|
---|
4048 | {
|
---|
4049 | xml_node result = insert_child_after(proto.type(), node);
|
---|
4050 |
|
---|
4051 | if (result) recursive_copy_skip(result, proto, result);
|
---|
4052 |
|
---|
4053 | return result;
|
---|
4054 | }
|
---|
4055 |
|
---|
4056 | xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
|
---|
4057 | {
|
---|
4058 | xml_node result = insert_child_before(proto.type(), node);
|
---|
4059 |
|
---|
4060 | if (result) recursive_copy_skip(result, proto, result);
|
---|
4061 |
|
---|
4062 | return result;
|
---|
4063 | }
|
---|
4064 |
|
---|
4065 | bool xml_node::remove_attribute(const char_t* name)
|
---|
4066 | {
|
---|
4067 | return remove_attribute(attribute(name));
|
---|
4068 | }
|
---|
4069 |
|
---|
4070 | bool xml_node::remove_attribute(const xml_attribute& a)
|
---|
4071 | {
|
---|
4072 | if (!_root || !a._attr) return false;
|
---|
4073 |
|
---|
4074 | // check that attribute belongs to *this
|
---|
4075 | xml_attribute_struct* attr = a._attr;
|
---|
4076 |
|
---|
4077 | while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
|
---|
4078 |
|
---|
4079 | if (attr != _root->first_attribute) return false;
|
---|
4080 |
|
---|
4081 | if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
|
---|
4082 | else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
|
---|
4083 |
|
---|
4084 | if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
|
---|
4085 | else _root->first_attribute = a._attr->next_attribute;
|
---|
4086 |
|
---|
4087 | destroy_attribute(a._attr, get_allocator(_root));
|
---|
4088 |
|
---|
4089 | return true;
|
---|
4090 | }
|
---|
4091 |
|
---|
4092 | bool xml_node::remove_child(const char_t* name)
|
---|
4093 | {
|
---|
4094 | return remove_child(child(name));
|
---|
4095 | }
|
---|
4096 |
|
---|
4097 | bool xml_node::remove_child(const xml_node& n)
|
---|
4098 | {
|
---|
4099 | if (!_root || !n._root || n._root->parent != _root) return false;
|
---|
4100 |
|
---|
4101 | if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
|
---|
4102 | else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
|
---|
4103 |
|
---|
4104 | if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
|
---|
4105 | else _root->first_child = n._root->next_sibling;
|
---|
4106 |
|
---|
4107 | destroy_node(n._root, get_allocator(_root));
|
---|
4108 |
|
---|
4109 | return true;
|
---|
4110 | }
|
---|
4111 |
|
---|
4112 | xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const
|
---|
4113 | {
|
---|
4114 | if (!_root) return xml_node();
|
---|
4115 |
|
---|
4116 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
|
---|
4117 | if (i->name && strequal(name, i->name))
|
---|
4118 | {
|
---|
4119 | for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
|
---|
4120 | if (strequal(attr_name, a->name) && strequal(attr_value, a->value))
|
---|
4121 | return xml_node(i);
|
---|
4122 | }
|
---|
4123 |
|
---|
4124 | return xml_node();
|
---|
4125 | }
|
---|
4126 |
|
---|
4127 | xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
|
---|
4128 | {
|
---|
4129 | if (!_root) return xml_node();
|
---|
4130 |
|
---|
4131 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
|
---|
4132 | for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
|
---|
4133 | if (strequal(attr_name, a->name) && strequal(attr_value, a->value))
|
---|
4134 | return xml_node(i);
|
---|
4135 |
|
---|
4136 | return xml_node();
|
---|
4137 | }
|
---|
4138 |
|
---|
4139 | #ifndef PUGIXML_NO_STL
|
---|
4140 | string_t xml_node::path(char_t delimiter) const
|
---|
4141 | {
|
---|
4142 | string_t path;
|
---|
4143 |
|
---|
4144 | xml_node cursor = *this; // Make a copy.
|
---|
4145 |
|
---|
4146 | path = cursor.name();
|
---|
4147 |
|
---|
4148 | while (cursor.parent())
|
---|
4149 | {
|
---|
4150 | cursor = cursor.parent();
|
---|
4151 |
|
---|
4152 | string_t temp = cursor.name();
|
---|
4153 | temp += delimiter;
|
---|
4154 | temp += path;
|
---|
4155 | path.swap(temp);
|
---|
4156 | }
|
---|
4157 |
|
---|
4158 | return path;
|
---|
4159 | }
|
---|
4160 | #endif
|
---|
4161 |
|
---|
4162 | xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const
|
---|
4163 | {
|
---|
4164 | xml_node found = *this; // Current search context.
|
---|
4165 |
|
---|
4166 | if (!_root || !path || !path[0]) return found;
|
---|
4167 |
|
---|
4168 | if (path[0] == delimiter)
|
---|
4169 | {
|
---|
4170 | // Absolute path; e.g. '/foo/bar'
|
---|
4171 | found = found.root();
|
---|
4172 | ++path;
|
---|
4173 | }
|
---|
4174 |
|
---|
4175 | const char_t* path_segment = path;
|
---|
4176 |
|
---|
4177 | while (*path_segment == delimiter) ++path_segment;
|
---|
4178 |
|
---|
4179 | const char_t* path_segment_end = path_segment;
|
---|
4180 |
|
---|
4181 | while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
|
---|
4182 |
|
---|
4183 | if (path_segment == path_segment_end) return found;
|
---|
4184 |
|
---|
4185 | const char_t* next_segment = path_segment_end;
|
---|
4186 |
|
---|
4187 | while (*next_segment == delimiter) ++next_segment;
|
---|
4188 |
|
---|
4189 | if (*path_segment == '.' && path_segment + 1 == path_segment_end)
|
---|
4190 | return found.first_element_by_path(next_segment, delimiter);
|
---|
4191 | else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
|
---|
4192 | return found.parent().first_element_by_path(next_segment, delimiter);
|
---|
4193 | else
|
---|
4194 | {
|
---|
4195 | for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
|
---|
4196 | {
|
---|
4197 | if (j->name && strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
|
---|
4198 | {
|
---|
4199 | xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
|
---|
4200 |
|
---|
4201 | if (subsearch) return subsearch;
|
---|
4202 | }
|
---|
4203 | }
|
---|
4204 |
|
---|
4205 | return xml_node();
|
---|
4206 | }
|
---|
4207 | }
|
---|
4208 |
|
---|
4209 | bool xml_node::traverse(xml_tree_walker& walker)
|
---|
4210 | {
|
---|
4211 | walker._depth = -1;
|
---|
4212 |
|
---|
4213 | xml_node arg_begin = *this;
|
---|
4214 | if (!walker.begin(arg_begin)) return false;
|
---|
4215 |
|
---|
4216 | xml_node cur = first_child();
|
---|
4217 |
|
---|
4218 | if (cur)
|
---|
4219 | {
|
---|
4220 | ++walker._depth;
|
---|
4221 |
|
---|
4222 | do
|
---|
4223 | {
|
---|
4224 | xml_node arg_for_each = cur;
|
---|
4225 | if (!walker.for_each(arg_for_each))
|
---|
4226 | return false;
|
---|
4227 |
|
---|
4228 | if (cur.first_child())
|
---|
4229 | {
|
---|
4230 | ++walker._depth;
|
---|
4231 | cur = cur.first_child();
|
---|
4232 | }
|
---|
4233 | else if (cur.next_sibling())
|
---|
4234 | cur = cur.next_sibling();
|
---|
4235 | else
|
---|
4236 | {
|
---|
4237 | // Borland C++ workaround
|
---|
4238 | while (!cur.next_sibling() && cur != *this && (bool)cur.parent())
|
---|
4239 | {
|
---|
4240 | --walker._depth;
|
---|
4241 | cur = cur.parent();
|
---|
4242 | }
|
---|
4243 |
|
---|
4244 | if (cur != *this)
|
---|
4245 | cur = cur.next_sibling();
|
---|
4246 | }
|
---|
4247 | }
|
---|
4248 | while (cur && cur != *this);
|
---|
4249 | }
|
---|
4250 |
|
---|
4251 | assert(walker._depth == -1);
|
---|
4252 |
|
---|
4253 | xml_node arg_end = *this;
|
---|
4254 | return walker.end(arg_end);
|
---|
4255 | }
|
---|
4256 |
|
---|
4257 | size_t xml_node::hash_value() const
|
---|
4258 | {
|
---|
4259 | return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
|
---|
4260 | }
|
---|
4261 |
|
---|
4262 | xml_node_struct* xml_node::internal_object() const
|
---|
4263 | {
|
---|
4264 | return _root;
|
---|
4265 | }
|
---|
4266 |
|
---|
4267 | void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
|
---|
4268 | {
|
---|
4269 | if (!_root) return;
|
---|
4270 |
|
---|
4271 | xml_buffered_writer buffered_writer(writer, encoding);
|
---|
4272 |
|
---|
4273 | node_output(buffered_writer, *this, indent, flags, depth);
|
---|
4274 | }
|
---|
4275 |
|
---|
4276 | #ifndef PUGIXML_NO_STL
|
---|
4277 | void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
|
---|
4278 | {
|
---|
4279 | xml_writer_stream writer(stream);
|
---|
4280 |
|
---|
4281 | print(writer, indent, flags, encoding, depth);
|
---|
4282 | }
|
---|
4283 |
|
---|
4284 | void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
|
---|
4285 | {
|
---|
4286 | xml_writer_stream writer(stream);
|
---|
4287 |
|
---|
4288 | print(writer, indent, flags, encoding_wchar, depth);
|
---|
4289 | }
|
---|
4290 | #endif
|
---|
4291 |
|
---|
4292 | ptrdiff_t xml_node::offset_debug() const
|
---|
4293 | {
|
---|
4294 | xml_node_struct* r = root()._root;
|
---|
4295 |
|
---|
4296 | if (!r) return -1;
|
---|
4297 |
|
---|
4298 | const char_t* buffer = static_cast<xml_document_struct*>(r)->buffer;
|
---|
4299 |
|
---|
4300 | if (!buffer) return -1;
|
---|
4301 |
|
---|
4302 | switch (type())
|
---|
4303 | {
|
---|
4304 | case node_document:
|
---|
4305 | return 0;
|
---|
4306 |
|
---|
4307 | case node_element:
|
---|
4308 | case node_declaration:
|
---|
4309 | case node_pi:
|
---|
4310 | return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
|
---|
4311 |
|
---|
4312 | case node_pcdata:
|
---|
4313 | case node_cdata:
|
---|
4314 | case node_comment:
|
---|
4315 | case node_doctype:
|
---|
4316 | return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
|
---|
4317 |
|
---|
4318 | default:
|
---|
4319 | return -1;
|
---|
4320 | }
|
---|
4321 | }
|
---|
4322 |
|
---|
4323 | #ifdef __BORLANDC__
|
---|
4324 | bool operator&&(const xml_node& lhs, bool rhs)
|
---|
4325 | {
|
---|
4326 | return (bool)lhs && rhs;
|
---|
4327 | }
|
---|
4328 |
|
---|
4329 | bool operator||(const xml_node& lhs, bool rhs)
|
---|
4330 | {
|
---|
4331 | return (bool)lhs || rhs;
|
---|
4332 | }
|
---|
4333 | #endif
|
---|
4334 |
|
---|
4335 | xml_node_iterator::xml_node_iterator()
|
---|
4336 | {
|
---|
4337 | }
|
---|
4338 |
|
---|
4339 | xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
|
---|
4340 | {
|
---|
4341 | }
|
---|
4342 |
|
---|
4343 | xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
|
---|
4344 | {
|
---|
4345 | }
|
---|
4346 |
|
---|
4347 | bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
|
---|
4348 | {
|
---|
4349 | return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
|
---|
4350 | }
|
---|
4351 |
|
---|
4352 | bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
|
---|
4353 | {
|
---|
4354 | return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
|
---|
4355 | }
|
---|
4356 |
|
---|
4357 | xml_node& xml_node_iterator::operator*()
|
---|
4358 | {
|
---|
4359 | assert(_wrap._root);
|
---|
4360 | return _wrap;
|
---|
4361 | }
|
---|
4362 |
|
---|
4363 | xml_node* xml_node_iterator::operator->()
|
---|
4364 | {
|
---|
4365 | assert(_wrap._root);
|
---|
4366 | return &_wrap;
|
---|
4367 | }
|
---|
4368 |
|
---|
4369 | const xml_node_iterator& xml_node_iterator::operator++()
|
---|
4370 | {
|
---|
4371 | assert(_wrap._root);
|
---|
4372 | _wrap._root = _wrap._root->next_sibling;
|
---|
4373 | return *this;
|
---|
4374 | }
|
---|
4375 |
|
---|
4376 | xml_node_iterator xml_node_iterator::operator++(int)
|
---|
4377 | {
|
---|
4378 | xml_node_iterator temp = *this;
|
---|
4379 | ++*this;
|
---|
4380 | return temp;
|
---|
4381 | }
|
---|
4382 |
|
---|
4383 | const xml_node_iterator& xml_node_iterator::operator--()
|
---|
4384 | {
|
---|
4385 | _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
|
---|
4386 | return *this;
|
---|
4387 | }
|
---|
4388 |
|
---|
4389 | xml_node_iterator xml_node_iterator::operator--(int)
|
---|
4390 | {
|
---|
4391 | xml_node_iterator temp = *this;
|
---|
4392 | --*this;
|
---|
4393 | return temp;
|
---|
4394 | }
|
---|
4395 |
|
---|
4396 | xml_attribute_iterator::xml_attribute_iterator()
|
---|
4397 | {
|
---|
4398 | }
|
---|
4399 |
|
---|
4400 | xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
|
---|
4401 | {
|
---|
4402 | }
|
---|
4403 |
|
---|
4404 | xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
|
---|
4405 | {
|
---|
4406 | }
|
---|
4407 |
|
---|
4408 | bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
|
---|
4409 | {
|
---|
4410 | return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
|
---|
4411 | }
|
---|
4412 |
|
---|
4413 | bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
|
---|
4414 | {
|
---|
4415 | return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
|
---|
4416 | }
|
---|
4417 |
|
---|
4418 | xml_attribute& xml_attribute_iterator::operator*()
|
---|
4419 | {
|
---|
4420 | assert(_wrap._attr);
|
---|
4421 | return _wrap;
|
---|
4422 | }
|
---|
4423 |
|
---|
4424 | xml_attribute* xml_attribute_iterator::operator->()
|
---|
4425 | {
|
---|
4426 | assert(_wrap._attr);
|
---|
4427 | return &_wrap;
|
---|
4428 | }
|
---|
4429 |
|
---|
4430 | const xml_attribute_iterator& xml_attribute_iterator::operator++()
|
---|
4431 | {
|
---|
4432 | assert(_wrap._attr);
|
---|
4433 | _wrap._attr = _wrap._attr->next_attribute;
|
---|
4434 | return *this;
|
---|
4435 | }
|
---|
4436 |
|
---|
4437 | xml_attribute_iterator xml_attribute_iterator::operator++(int)
|
---|
4438 | {
|
---|
4439 | xml_attribute_iterator temp = *this;
|
---|
4440 | ++*this;
|
---|
4441 | return temp;
|
---|
4442 | }
|
---|
4443 |
|
---|
4444 | const xml_attribute_iterator& xml_attribute_iterator::operator--()
|
---|
4445 | {
|
---|
4446 | _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
|
---|
4447 | return *this;
|
---|
4448 | }
|
---|
4449 |
|
---|
4450 | xml_attribute_iterator xml_attribute_iterator::operator--(int)
|
---|
4451 | {
|
---|
4452 | xml_attribute_iterator temp = *this;
|
---|
4453 | --*this;
|
---|
4454 | return temp;
|
---|
4455 | }
|
---|
4456 |
|
---|
4457 | xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
|
---|
4458 | {
|
---|
4459 | }
|
---|
4460 |
|
---|
4461 | xml_parse_result::operator bool() const
|
---|
4462 | {
|
---|
4463 | return status == status_ok;
|
---|
4464 | }
|
---|
4465 |
|
---|
4466 | const char* xml_parse_result::description() const
|
---|
4467 | {
|
---|
4468 | switch (status)
|
---|
4469 | {
|
---|
4470 | case status_ok: return "No error";
|
---|
4471 |
|
---|
4472 | case status_file_not_found: return "File was not found";
|
---|
4473 | case status_io_error: return "Error reading from file/stream";
|
---|
4474 | case status_out_of_memory: return "Could not allocate memory";
|
---|
4475 | case status_internal_error: return "Internal error occurred";
|
---|
4476 |
|
---|
4477 | case status_unrecognized_tag: return "Could not determine tag type";
|
---|
4478 |
|
---|
4479 | case status_bad_pi: return "Error parsing document declaration/processing instruction";
|
---|
4480 | case status_bad_comment: return "Error parsing comment";
|
---|
4481 | case status_bad_cdata: return "Error parsing CDATA section";
|
---|
4482 | case status_bad_doctype: return "Error parsing document type declaration";
|
---|
4483 | case status_bad_pcdata: return "Error parsing PCDATA section";
|
---|
4484 | case status_bad_start_element: return "Error parsing start element tag";
|
---|
4485 | case status_bad_attribute: return "Error parsing element attribute";
|
---|
4486 | case status_bad_end_element: return "Error parsing end element tag";
|
---|
4487 | case status_end_element_mismatch: return "Start-end tags mismatch";
|
---|
4488 |
|
---|
4489 | default: return "Unknown error";
|
---|
4490 | }
|
---|
4491 | }
|
---|
4492 |
|
---|
4493 | xml_document::xml_document(): _buffer(0)
|
---|
4494 | {
|
---|
4495 | create();
|
---|
4496 | }
|
---|
4497 |
|
---|
4498 | xml_document::~xml_document()
|
---|
4499 | {
|
---|
4500 | destroy();
|
---|
4501 | }
|
---|
4502 |
|
---|
4503 | void xml_document::reset()
|
---|
4504 | {
|
---|
4505 | destroy();
|
---|
4506 | create();
|
---|
4507 | }
|
---|
4508 |
|
---|
4509 | void xml_document::reset(const xml_document& proto)
|
---|
4510 | {
|
---|
4511 | reset();
|
---|
4512 |
|
---|
4513 | for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
|
---|
4514 | append_copy(cur);
|
---|
4515 | }
|
---|
4516 |
|
---|
4517 | void xml_document::create()
|
---|
4518 | {
|
---|
4519 | // initialize sentinel page
|
---|
4520 | STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory));
|
---|
4521 |
|
---|
4522 | // align upwards to page boundary
|
---|
4523 | void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
|
---|
4524 |
|
---|
4525 | // prepare page structure
|
---|
4526 | xml_memory_page* page = xml_memory_page::construct(page_memory);
|
---|
4527 |
|
---|
4528 | page->busy_size = xml_memory_page_size;
|
---|
4529 |
|
---|
4530 | // allocate new root
|
---|
4531 | _root = new (page->data) xml_document_struct(page);
|
---|
4532 | _root->prev_sibling_c = _root;
|
---|
4533 |
|
---|
4534 | // setup sentinel page
|
---|
4535 | page->allocator = static_cast<xml_document_struct*>(_root);
|
---|
4536 | }
|
---|
4537 |
|
---|
4538 | void xml_document::destroy()
|
---|
4539 | {
|
---|
4540 | // destroy static storage
|
---|
4541 | if (_buffer)
|
---|
4542 | {
|
---|
4543 | global_deallocate(_buffer);
|
---|
4544 | _buffer = 0;
|
---|
4545 | }
|
---|
4546 |
|
---|
4547 | // destroy dynamic storage, leave sentinel page (it's in static memory)
|
---|
4548 | if (_root)
|
---|
4549 | {
|
---|
4550 | xml_memory_page* root_page = reinterpret_cast<xml_memory_page*>(_root->header & xml_memory_page_pointer_mask);
|
---|
4551 | assert(root_page && !root_page->prev && !root_page->memory);
|
---|
4552 |
|
---|
4553 | // destroy all pages
|
---|
4554 | for (xml_memory_page* page = root_page->next; page; )
|
---|
4555 | {
|
---|
4556 | xml_memory_page* next = page->next;
|
---|
4557 |
|
---|
4558 | xml_allocator::deallocate_page(page);
|
---|
4559 |
|
---|
4560 | page = next;
|
---|
4561 | }
|
---|
4562 |
|
---|
4563 | // cleanup root page
|
---|
4564 | root_page->allocator = 0;
|
---|
4565 | root_page->next = 0;
|
---|
4566 | root_page->busy_size = root_page->freed_size = 0;
|
---|
4567 |
|
---|
4568 | _root = 0;
|
---|
4569 | }
|
---|
4570 | }
|
---|
4571 |
|
---|
4572 | #ifndef PUGIXML_NO_STL
|
---|
4573 | xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
|
---|
4574 | {
|
---|
4575 | reset();
|
---|
4576 |
|
---|
4577 | return load_stream_impl(*this, stream, options, encoding);
|
---|
4578 | }
|
---|
4579 |
|
---|
4580 | xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
|
---|
4581 | {
|
---|
4582 | reset();
|
---|
4583 |
|
---|
4584 | return load_stream_impl(*this, stream, options, encoding_wchar);
|
---|
4585 | }
|
---|
4586 | #endif
|
---|
4587 |
|
---|
4588 | xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
|
---|
4589 | {
|
---|
4590 | // Force native encoding (skip autodetection)
|
---|
4591 | #ifdef PUGIXML_WCHAR_MODE
|
---|
4592 | xml_encoding encoding = encoding_wchar;
|
---|
4593 | #else
|
---|
4594 | xml_encoding encoding = encoding_utf8;
|
---|
4595 | #endif
|
---|
4596 |
|
---|
4597 | return load_buffer(contents, strlength(contents) * sizeof(char_t), options, encoding);
|
---|
4598 | }
|
---|
4599 |
|
---|
4600 | xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding)
|
---|
4601 | {
|
---|
4602 | reset();
|
---|
4603 |
|
---|
4604 | FILE* file = fopen(path, "rb");
|
---|
4605 |
|
---|
4606 | return load_file_impl(*this, file, options, encoding);
|
---|
4607 | }
|
---|
4608 |
|
---|
4609 | xml_parse_result xml_document::load_file(const wchar_t* path, unsigned int options, xml_encoding encoding)
|
---|
4610 | {
|
---|
4611 | reset();
|
---|
4612 |
|
---|
4613 | FILE* file = open_file_wide(path, L"rb");
|
---|
4614 |
|
---|
4615 | return load_file_impl(*this, file, options, encoding);
|
---|
4616 | }
|
---|
4617 |
|
---|
4618 | xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own)
|
---|
4619 | {
|
---|
4620 | reset();
|
---|
4621 |
|
---|
4622 | // check input buffer
|
---|
4623 | assert(contents || size == 0);
|
---|
4624 |
|
---|
4625 | // get actual encoding
|
---|
4626 | xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size);
|
---|
4627 |
|
---|
4628 | // get private buffer
|
---|
4629 | char_t* buffer = 0;
|
---|
4630 | size_t length = 0;
|
---|
4631 |
|
---|
4632 | if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory);
|
---|
4633 |
|
---|
4634 | // delete original buffer if we performed a conversion
|
---|
4635 | if (own && buffer != contents && contents) global_deallocate(contents);
|
---|
4636 |
|
---|
4637 | // parse
|
---|
4638 | xml_parse_result res = xml_parser::parse(buffer, length, _root, options);
|
---|
4639 |
|
---|
4640 | // remember encoding
|
---|
4641 | res.encoding = buffer_encoding;
|
---|
4642 |
|
---|
4643 | // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
|
---|
4644 | if (own || buffer != contents) _buffer = buffer;
|
---|
4645 |
|
---|
4646 | return res;
|
---|
4647 | }
|
---|
4648 |
|
---|
4649 | xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
|
---|
4650 | {
|
---|
4651 | return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false);
|
---|
4652 | }
|
---|
4653 |
|
---|
4654 | xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
|
---|
4655 | {
|
---|
4656 | return load_buffer_impl(contents, size, options, encoding, true, false);
|
---|
4657 | }
|
---|
4658 |
|
---|
4659 | xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
|
---|
4660 | {
|
---|
4661 | return load_buffer_impl(contents, size, options, encoding, true, true);
|
---|
4662 | }
|
---|
4663 |
|
---|
4664 | void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
|
---|
4665 | {
|
---|
4666 | if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding));
|
---|
4667 |
|
---|
4668 | xml_buffered_writer buffered_writer(writer, encoding);
|
---|
4669 |
|
---|
4670 | if (!(flags & format_no_declaration) && !has_declaration(*this))
|
---|
4671 | {
|
---|
4672 | buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\"?>"));
|
---|
4673 | if (!(flags & format_raw)) buffered_writer.write('\n');
|
---|
4674 | }
|
---|
4675 |
|
---|
4676 | node_output(buffered_writer, *this, indent, flags, 0);
|
---|
4677 | }
|
---|
4678 |
|
---|
4679 | #ifndef PUGIXML_NO_STL
|
---|
4680 | void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
|
---|
4681 | {
|
---|
4682 | xml_writer_stream writer(stream);
|
---|
4683 |
|
---|
4684 | save(writer, indent, flags, encoding);
|
---|
4685 | }
|
---|
4686 |
|
---|
4687 | void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
|
---|
4688 | {
|
---|
4689 | xml_writer_stream writer(stream);
|
---|
4690 |
|
---|
4691 | save(writer, indent, flags, encoding_wchar);
|
---|
4692 | }
|
---|
4693 | #endif
|
---|
4694 |
|
---|
4695 | bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const
|
---|
4696 | {
|
---|
4697 | FILE* file = fopen(path, "wb");
|
---|
4698 | if (!file) return false;
|
---|
4699 |
|
---|
4700 | xml_writer_file writer(file);
|
---|
4701 | save(writer, indent, flags, encoding);
|
---|
4702 |
|
---|
4703 | fclose(file);
|
---|
4704 |
|
---|
4705 | return true;
|
---|
4706 | }
|
---|
4707 |
|
---|
4708 | bool xml_document::save_file(const wchar_t* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const
|
---|
4709 | {
|
---|
4710 | FILE* file = open_file_wide(path, L"wb");
|
---|
4711 | if (!file) return false;
|
---|
4712 |
|
---|
4713 | xml_writer_file writer(file);
|
---|
4714 | save(writer, indent, flags, encoding);
|
---|
4715 |
|
---|
4716 | fclose(file);
|
---|
4717 |
|
---|
4718 | return true;
|
---|
4719 | }
|
---|
4720 |
|
---|
4721 | xml_node xml_document::document_element() const
|
---|
4722 | {
|
---|
4723 | for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
|
---|
4724 | if ((i->header & xml_memory_page_type_mask) + 1 == node_element)
|
---|
4725 | return xml_node(i);
|
---|
4726 |
|
---|
4727 | return xml_node();
|
---|
4728 | }
|
---|
4729 |
|
---|
4730 | #ifndef PUGIXML_NO_STL
|
---|
4731 | std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
|
---|
4732 | {
|
---|
4733 | assert(str);
|
---|
4734 |
|
---|
4735 | return as_utf8_impl(str, wcslen(str));
|
---|
4736 | }
|
---|
4737 |
|
---|
4738 | std::string PUGIXML_FUNCTION as_utf8(const std::wstring& str)
|
---|
4739 | {
|
---|
4740 | return as_utf8_impl(str.c_str(), str.size());
|
---|
4741 | }
|
---|
4742 |
|
---|
4743 | std::wstring PUGIXML_FUNCTION as_wide(const char* str)
|
---|
4744 | {
|
---|
4745 | assert(str);
|
---|
4746 |
|
---|
4747 | return as_wide_impl(str, strlen(str));
|
---|
4748 | }
|
---|
4749 |
|
---|
4750 | std::wstring PUGIXML_FUNCTION as_wide(const std::string& str)
|
---|
4751 | {
|
---|
4752 | return as_wide_impl(str.c_str(), str.size());
|
---|
4753 | }
|
---|
4754 | #endif
|
---|
4755 |
|
---|
4756 | void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
|
---|
4757 | {
|
---|
4758 | global_allocate = allocate;
|
---|
4759 | global_deallocate = deallocate;
|
---|
4760 | }
|
---|
4761 |
|
---|
4762 | allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
|
---|
4763 | {
|
---|
4764 | return global_allocate;
|
---|
4765 | }
|
---|
4766 |
|
---|
4767 | deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
|
---|
4768 | {
|
---|
4769 | return global_deallocate;
|
---|
4770 | }
|
---|
4771 | }
|
---|
4772 |
|
---|
4773 | #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
|
---|
4774 | namespace std
|
---|
4775 | {
|
---|
4776 | // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
|
---|
4777 | std::bidirectional_iterator_tag _Iter_cat(const xml_node_iterator&)
|
---|
4778 | {
|
---|
4779 | return std::bidirectional_iterator_tag();
|
---|
4780 | }
|
---|
4781 |
|
---|
4782 | std::bidirectional_iterator_tag _Iter_cat(const xml_attribute_iterator&)
|
---|
4783 | {
|
---|
4784 | return std::bidirectional_iterator_tag();
|
---|
4785 | }
|
---|
4786 | }
|
---|
4787 | #endif
|
---|
4788 |
|
---|
4789 | #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
|
---|
4790 | namespace std
|
---|
4791 | {
|
---|
4792 | // Workarounds for (non-standard) iterator category detection
|
---|
4793 | std::bidirectional_iterator_tag __iterator_category(const xml_node_iterator&)
|
---|
4794 | {
|
---|
4795 | return std::bidirectional_iterator_tag();
|
---|
4796 | }
|
---|
4797 |
|
---|
4798 | std::bidirectional_iterator_tag __iterator_category(const xml_attribute_iterator&)
|
---|
4799 | {
|
---|
4800 | return std::bidirectional_iterator_tag();
|
---|
4801 | }
|
---|
4802 | }
|
---|
4803 | #endif
|
---|
4804 |
|
---|
4805 | #ifndef PUGIXML_NO_XPATH
|
---|
4806 |
|
---|
4807 | // STL replacements
|
---|
4808 | namespace
|
---|
4809 | {
|
---|
4810 | struct equal_to
|
---|
4811 | {
|
---|
4812 | template <typename T> bool operator()(const T& lhs, const T& rhs) const
|
---|
4813 | {
|
---|
4814 | return lhs == rhs;
|
---|
4815 | }
|
---|
4816 | };
|
---|
4817 |
|
---|
4818 | struct not_equal_to
|
---|
4819 | {
|
---|
4820 | template <typename T> bool operator()(const T& lhs, const T& rhs) const
|
---|
4821 | {
|
---|
4822 | return lhs != rhs;
|
---|
4823 | }
|
---|
4824 | };
|
---|
4825 |
|
---|
4826 | struct less
|
---|
4827 | {
|
---|
4828 | template <typename T> bool operator()(const T& lhs, const T& rhs) const
|
---|
4829 | {
|
---|
4830 | return lhs < rhs;
|
---|
4831 | }
|
---|
4832 | };
|
---|
4833 |
|
---|
4834 | struct less_equal
|
---|
4835 | {
|
---|
4836 | template <typename T> bool operator()(const T& lhs, const T& rhs) const
|
---|
4837 | {
|
---|
4838 | return lhs <= rhs;
|
---|
4839 | }
|
---|
4840 | };
|
---|
4841 |
|
---|
4842 | template <typename T> void swap(T& lhs, T& rhs)
|
---|
4843 | {
|
---|
4844 | T temp = lhs;
|
---|
4845 | lhs = rhs;
|
---|
4846 | rhs = temp;
|
---|
4847 | }
|
---|
4848 |
|
---|
4849 | template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
|
---|
4850 | {
|
---|
4851 | I result = begin;
|
---|
4852 |
|
---|
4853 | for (I it = begin + 1; it != end; ++it)
|
---|
4854 | if (pred(*it, *result))
|
---|
4855 | result = it;
|
---|
4856 |
|
---|
4857 | return result;
|
---|
4858 | }
|
---|
4859 |
|
---|
4860 | template <typename I> void reverse(I begin, I end)
|
---|
4861 | {
|
---|
4862 | while (begin + 1 < end) swap(*begin++, *--end);
|
---|
4863 | }
|
---|
4864 |
|
---|
4865 | template <typename I> I unique(I begin, I end)
|
---|
4866 | {
|
---|
4867 | // fast skip head
|
---|
4868 | while (begin + 1 < end && *begin != *(begin + 1)) begin++;
|
---|
4869 |
|
---|
4870 | if (begin == end) return begin;
|
---|
4871 |
|
---|
4872 | // last written element
|
---|
4873 | I write = begin++;
|
---|
4874 |
|
---|
4875 | // merge unique elements
|
---|
4876 | while (begin != end)
|
---|
4877 | {
|
---|
4878 | if (*begin != *write)
|
---|
4879 | *++write = *begin++;
|
---|
4880 | else
|
---|
4881 | begin++;
|
---|
4882 | }
|
---|
4883 |
|
---|
4884 | // past-the-end (write points to live element)
|
---|
4885 | return write + 1;
|
---|
4886 | }
|
---|
4887 |
|
---|
4888 | template <typename I> void copy_backwards(I begin, I end, I target)
|
---|
4889 | {
|
---|
4890 | while (begin != end) *--target = *--end;
|
---|
4891 | }
|
---|
4892 |
|
---|
4893 | template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
|
---|
4894 | {
|
---|
4895 | assert(begin != end);
|
---|
4896 |
|
---|
4897 | for (I it = begin + 1; it != end; ++it)
|
---|
4898 | {
|
---|
4899 | T val = *it;
|
---|
4900 |
|
---|
4901 | if (pred(val, *begin))
|
---|
4902 | {
|
---|
4903 | // move to front
|
---|
4904 | copy_backwards(begin, it, it + 1);
|
---|
4905 | *begin = val;
|
---|
4906 | }
|
---|
4907 | else
|
---|
4908 | {
|
---|
4909 | I hole = it;
|
---|
4910 |
|
---|
4911 | // move hole backwards
|
---|
4912 | while (pred(val, *(hole - 1)))
|
---|
4913 | {
|
---|
4914 | *hole = *(hole - 1);
|
---|
4915 | hole--;
|
---|
4916 | }
|
---|
4917 |
|
---|
4918 | // fill hole with element
|
---|
4919 | *hole = val;
|
---|
4920 | }
|
---|
4921 | }
|
---|
4922 | }
|
---|
4923 |
|
---|
4924 | // std variant for elements with ==
|
---|
4925 | template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
|
---|
4926 | {
|
---|
4927 | I eqbeg = middle, eqend = middle + 1;
|
---|
4928 |
|
---|
4929 | // expand equal range
|
---|
4930 | while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
|
---|
4931 | while (eqend != end && *eqend == *eqbeg) ++eqend;
|
---|
4932 |
|
---|
4933 | // process outer elements
|
---|
4934 | I ltend = eqbeg, gtbeg = eqend;
|
---|
4935 |
|
---|
4936 | for (;;)
|
---|
4937 | {
|
---|
4938 | // find the element from the right side that belongs to the left one
|
---|
4939 | for (; gtbeg != end; ++gtbeg)
|
---|
4940 | if (!pred(*eqbeg, *gtbeg))
|
---|
4941 | {
|
---|
4942 | if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
|
---|
4943 | else break;
|
---|
4944 | }
|
---|
4945 |
|
---|
4946 | // find the element from the left side that belongs to the right one
|
---|
4947 | for (; ltend != begin; --ltend)
|
---|
4948 | if (!pred(*(ltend - 1), *eqbeg))
|
---|
4949 | {
|
---|
4950 | if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
|
---|
4951 | else break;
|
---|
4952 | }
|
---|
4953 |
|
---|
4954 | // scanned all elements
|
---|
4955 | if (gtbeg == end && ltend == begin)
|
---|
4956 | {
|
---|
4957 | *out_eqbeg = eqbeg;
|
---|
4958 | *out_eqend = eqend;
|
---|
4959 | return;
|
---|
4960 | }
|
---|
4961 |
|
---|
4962 | // make room for elements by moving equal area
|
---|
4963 | if (gtbeg == end)
|
---|
4964 | {
|
---|
4965 | if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
|
---|
4966 | swap(*eqbeg, *--eqend);
|
---|
4967 | }
|
---|
4968 | else if (ltend == begin)
|
---|
4969 | {
|
---|
4970 | if (eqend != gtbeg) swap(*eqbeg, *eqend);
|
---|
4971 | ++eqend;
|
---|
4972 | swap(*gtbeg++, *eqbeg++);
|
---|
4973 | }
|
---|
4974 | else swap(*gtbeg++, *--ltend);
|
---|
4975 | }
|
---|
4976 | }
|
---|
4977 |
|
---|
4978 | template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
|
---|
4979 | {
|
---|
4980 | if (pred(*middle, *first)) swap(*middle, *first);
|
---|
4981 | if (pred(*last, *middle)) swap(*last, *middle);
|
---|
4982 | if (pred(*middle, *first)) swap(*middle, *first);
|
---|
4983 | }
|
---|
4984 |
|
---|
4985 | template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
|
---|
4986 | {
|
---|
4987 | if (last - first <= 40)
|
---|
4988 | {
|
---|
4989 | // median of three for small chunks
|
---|
4990 | median3(first, middle, last, pred);
|
---|
4991 | }
|
---|
4992 | else
|
---|
4993 | {
|
---|
4994 | // median of nine
|
---|
4995 | size_t step = (last - first + 1) / 8;
|
---|
4996 |
|
---|
4997 | median3(first, first + step, first + 2 * step, pred);
|
---|
4998 | median3(middle - step, middle, middle + step, pred);
|
---|
4999 | median3(last - 2 * step, last - step, last, pred);
|
---|
5000 | median3(first + step, middle, last - step, pred);
|
---|
5001 | }
|
---|
5002 | }
|
---|
5003 |
|
---|
5004 | template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
|
---|
5005 | {
|
---|
5006 | // sort large chunks
|
---|
5007 | while (end - begin > 32)
|
---|
5008 | {
|
---|
5009 | // find median element
|
---|
5010 | I middle = begin + (end - begin) / 2;
|
---|
5011 | median(begin, middle, end - 1, pred);
|
---|
5012 |
|
---|
5013 | // partition in three chunks (< = >)
|
---|
5014 | I eqbeg, eqend;
|
---|
5015 | partition(begin, middle, end, pred, &eqbeg, &eqend);
|
---|
5016 |
|
---|
5017 | // loop on larger half
|
---|
5018 | if (eqbeg - begin > end - eqend)
|
---|
5019 | {
|
---|
5020 | sort(eqend, end, pred);
|
---|
5021 | end = eqbeg;
|
---|
5022 | }
|
---|
5023 | else
|
---|
5024 | {
|
---|
5025 | sort(begin, eqbeg, pred);
|
---|
5026 | begin = eqend;
|
---|
5027 | }
|
---|
5028 | }
|
---|
5029 |
|
---|
5030 | // insertion sort small chunk
|
---|
5031 | if (begin != end) insertion_sort(begin, end, pred, &*begin);
|
---|
5032 | }
|
---|
5033 | }
|
---|
5034 |
|
---|
5035 | // Allocator used for AST and evaluation stacks
|
---|
5036 | namespace
|
---|
5037 | {
|
---|
5038 | struct xpath_memory_block
|
---|
5039 | {
|
---|
5040 | xpath_memory_block* next;
|
---|
5041 |
|
---|
5042 | char data[4096];
|
---|
5043 | };
|
---|
5044 |
|
---|
5045 | class xpath_allocator
|
---|
5046 | {
|
---|
5047 | xpath_memory_block* _root;
|
---|
5048 | size_t _root_size;
|
---|
5049 |
|
---|
5050 | public:
|
---|
5051 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
5052 | jmp_buf* error_handler;
|
---|
5053 | #endif
|
---|
5054 |
|
---|
5055 | xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
|
---|
5056 | {
|
---|
5057 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
5058 | error_handler = 0;
|
---|
5059 | #endif
|
---|
5060 | }
|
---|
5061 |
|
---|
5062 | void* allocate_nothrow(size_t size)
|
---|
5063 | {
|
---|
5064 | const size_t block_capacity = sizeof(_root->data);
|
---|
5065 |
|
---|
5066 | // align size so that we're able to store pointers in subsequent blocks
|
---|
5067 | size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
|
---|
5068 |
|
---|
5069 | if (_root_size + size <= block_capacity)
|
---|
5070 | {
|
---|
5071 | void* buf = _root->data + _root_size;
|
---|
5072 | _root_size += size;
|
---|
5073 | return buf;
|
---|
5074 | }
|
---|
5075 | else
|
---|
5076 | {
|
---|
5077 | size_t block_data_size = (size > block_capacity) ? size : block_capacity;
|
---|
5078 | size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
|
---|
5079 |
|
---|
5080 | xpath_memory_block* block = static_cast<xpath_memory_block*>(global_allocate(block_size));
|
---|
5081 | if (!block) return 0;
|
---|
5082 |
|
---|
5083 | block->next = _root;
|
---|
5084 |
|
---|
5085 | _root = block;
|
---|
5086 | _root_size = size;
|
---|
5087 |
|
---|
5088 | return block->data;
|
---|
5089 | }
|
---|
5090 | }
|
---|
5091 |
|
---|
5092 | void* allocate(size_t size)
|
---|
5093 | {
|
---|
5094 | void* result = allocate_nothrow(size);
|
---|
5095 |
|
---|
5096 | if (!result)
|
---|
5097 | {
|
---|
5098 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
5099 | assert(error_handler);
|
---|
5100 | longjmp(*error_handler, 1);
|
---|
5101 | #else
|
---|
5102 | throw std::bad_alloc();
|
---|
5103 | #endif
|
---|
5104 | }
|
---|
5105 |
|
---|
5106 | return result;
|
---|
5107 | }
|
---|
5108 |
|
---|
5109 | void* reallocate(void* ptr, size_t old_size, size_t new_size)
|
---|
5110 | {
|
---|
5111 | // align size so that we're able to store pointers in subsequent blocks
|
---|
5112 | old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
|
---|
5113 | new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
|
---|
5114 |
|
---|
5115 | // we can only reallocate the last object
|
---|
5116 | assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
|
---|
5117 |
|
---|
5118 | // adjust root size so that we have not allocated the object at all
|
---|
5119 | bool only_object = (_root_size == old_size);
|
---|
5120 |
|
---|
5121 | if (ptr) _root_size -= old_size;
|
---|
5122 |
|
---|
5123 | // allocate a new version (this will obviously reuse the memory if possible)
|
---|
5124 | void* result = allocate(new_size);
|
---|
5125 | assert(result);
|
---|
5126 |
|
---|
5127 | // we have a new block
|
---|
5128 | if (result != ptr && ptr)
|
---|
5129 | {
|
---|
5130 | // copy old data
|
---|
5131 | assert(new_size > old_size);
|
---|
5132 | memcpy(result, ptr, old_size);
|
---|
5133 |
|
---|
5134 | // free the previous page if it had no other objects
|
---|
5135 | if (only_object)
|
---|
5136 | {
|
---|
5137 | assert(_root->data == result);
|
---|
5138 | assert(_root->next);
|
---|
5139 |
|
---|
5140 | xpath_memory_block* next = _root->next->next;
|
---|
5141 |
|
---|
5142 | if (next)
|
---|
5143 | {
|
---|
5144 | // deallocate the whole page, unless it was the first one
|
---|
5145 | global_deallocate(_root->next);
|
---|
5146 | _root->next = next;
|
---|
5147 | }
|
---|
5148 | }
|
---|
5149 | }
|
---|
5150 |
|
---|
5151 | return result;
|
---|
5152 | }
|
---|
5153 |
|
---|
5154 | void revert(const xpath_allocator& state)
|
---|
5155 | {
|
---|
5156 | // free all new pages
|
---|
5157 | xpath_memory_block* cur = _root;
|
---|
5158 |
|
---|
5159 | while (cur != state._root)
|
---|
5160 | {
|
---|
5161 | xpath_memory_block* next = cur->next;
|
---|
5162 |
|
---|
5163 | global_deallocate(cur);
|
---|
5164 |
|
---|
5165 | cur = next;
|
---|
5166 | }
|
---|
5167 |
|
---|
5168 | // restore state
|
---|
5169 | _root = state._root;
|
---|
5170 | _root_size = state._root_size;
|
---|
5171 | }
|
---|
5172 |
|
---|
5173 | void release()
|
---|
5174 | {
|
---|
5175 | xpath_memory_block* cur = _root;
|
---|
5176 | assert(cur);
|
---|
5177 |
|
---|
5178 | while (cur->next)
|
---|
5179 | {
|
---|
5180 | xpath_memory_block* next = cur->next;
|
---|
5181 |
|
---|
5182 | global_deallocate(cur);
|
---|
5183 |
|
---|
5184 | cur = next;
|
---|
5185 | }
|
---|
5186 | }
|
---|
5187 | };
|
---|
5188 |
|
---|
5189 | struct xpath_allocator_capture
|
---|
5190 | {
|
---|
5191 | xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
|
---|
5192 | {
|
---|
5193 | }
|
---|
5194 |
|
---|
5195 | ~xpath_allocator_capture()
|
---|
5196 | {
|
---|
5197 | _target->revert(_state);
|
---|
5198 | }
|
---|
5199 |
|
---|
5200 | xpath_allocator* _target;
|
---|
5201 | xpath_allocator _state;
|
---|
5202 | };
|
---|
5203 |
|
---|
5204 | struct xpath_stack
|
---|
5205 | {
|
---|
5206 | xpath_allocator* result;
|
---|
5207 | xpath_allocator* temp;
|
---|
5208 | };
|
---|
5209 |
|
---|
5210 | struct xpath_stack_data
|
---|
5211 | {
|
---|
5212 | xpath_memory_block blocks[2];
|
---|
5213 | xpath_allocator result;
|
---|
5214 | xpath_allocator temp;
|
---|
5215 | xpath_stack stack;
|
---|
5216 |
|
---|
5217 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
5218 | jmp_buf error_handler;
|
---|
5219 | #endif
|
---|
5220 |
|
---|
5221 | xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
|
---|
5222 | {
|
---|
5223 | blocks[0].next = blocks[1].next = 0;
|
---|
5224 |
|
---|
5225 | stack.result = &result;
|
---|
5226 | stack.temp = &temp;
|
---|
5227 |
|
---|
5228 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
5229 | result.error_handler = temp.error_handler = &error_handler;
|
---|
5230 | #endif
|
---|
5231 | }
|
---|
5232 |
|
---|
5233 | ~xpath_stack_data()
|
---|
5234 | {
|
---|
5235 | result.release();
|
---|
5236 | temp.release();
|
---|
5237 | }
|
---|
5238 | };
|
---|
5239 | }
|
---|
5240 |
|
---|
5241 | // String class
|
---|
5242 | namespace
|
---|
5243 | {
|
---|
5244 | class xpath_string
|
---|
5245 | {
|
---|
5246 | const char_t* _buffer;
|
---|
5247 | bool _uses_heap;
|
---|
5248 |
|
---|
5249 | static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
|
---|
5250 | {
|
---|
5251 | char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
|
---|
5252 | assert(result);
|
---|
5253 |
|
---|
5254 | memcpy(result, string, length * sizeof(char_t));
|
---|
5255 | result[length] = 0;
|
---|
5256 |
|
---|
5257 | return result;
|
---|
5258 | }
|
---|
5259 |
|
---|
5260 | static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)
|
---|
5261 | {
|
---|
5262 | return duplicate_string(string, strlength(string), alloc);
|
---|
5263 | }
|
---|
5264 |
|
---|
5265 | public:
|
---|
5266 | xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
|
---|
5267 | {
|
---|
5268 | }
|
---|
5269 |
|
---|
5270 | explicit xpath_string(const char_t* str, xpath_allocator* alloc)
|
---|
5271 | {
|
---|
5272 | bool empty = (*str == 0);
|
---|
5273 |
|
---|
5274 | _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
|
---|
5275 | _uses_heap = !empty;
|
---|
5276 | }
|
---|
5277 |
|
---|
5278 | explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
|
---|
5279 | {
|
---|
5280 | }
|
---|
5281 |
|
---|
5282 | xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)
|
---|
5283 | {
|
---|
5284 | assert(begin <= end);
|
---|
5285 |
|
---|
5286 | bool empty = (begin == end);
|
---|
5287 |
|
---|
5288 | _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
|
---|
5289 | _uses_heap = !empty;
|
---|
5290 | }
|
---|
5291 |
|
---|
5292 | void append(const xpath_string& o, xpath_allocator* alloc)
|
---|
5293 | {
|
---|
5294 | // skip empty sources
|
---|
5295 | if (!*o._buffer) return;
|
---|
5296 |
|
---|
5297 | // fast append for constant empty target and constant source
|
---|
5298 | if (!*_buffer && !_uses_heap && !o._uses_heap)
|
---|
5299 | {
|
---|
5300 | _buffer = o._buffer;
|
---|
5301 | }
|
---|
5302 | else
|
---|
5303 | {
|
---|
5304 | // need to make heap copy
|
---|
5305 | size_t target_length = strlength(_buffer);
|
---|
5306 | size_t source_length = strlength(o._buffer);
|
---|
5307 | size_t length = target_length + source_length;
|
---|
5308 |
|
---|
5309 | // allocate new buffer
|
---|
5310 | char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (length + 1) * sizeof(char_t)));
|
---|
5311 | assert(result);
|
---|
5312 |
|
---|
5313 | // append first string to the new buffer in case there was no reallocation
|
---|
5314 | if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
|
---|
5315 |
|
---|
5316 | // append second string to the new buffer
|
---|
5317 | memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
|
---|
5318 | result[length] = 0;
|
---|
5319 |
|
---|
5320 | // finalize
|
---|
5321 | _buffer = result;
|
---|
5322 | _uses_heap = true;
|
---|
5323 | }
|
---|
5324 | }
|
---|
5325 |
|
---|
5326 | const char_t* c_str() const
|
---|
5327 | {
|
---|
5328 | return _buffer;
|
---|
5329 | }
|
---|
5330 |
|
---|
5331 | size_t length() const
|
---|
5332 | {
|
---|
5333 | return strlength(_buffer);
|
---|
5334 | }
|
---|
5335 |
|
---|
5336 | char_t* data(xpath_allocator* alloc)
|
---|
5337 | {
|
---|
5338 | // make private heap copy
|
---|
5339 | if (!_uses_heap)
|
---|
5340 | {
|
---|
5341 | _buffer = duplicate_string(_buffer, alloc);
|
---|
5342 | _uses_heap = true;
|
---|
5343 | }
|
---|
5344 |
|
---|
5345 | return const_cast<char_t*>(_buffer);
|
---|
5346 | }
|
---|
5347 |
|
---|
5348 | bool empty() const
|
---|
5349 | {
|
---|
5350 | return *_buffer == 0;
|
---|
5351 | }
|
---|
5352 |
|
---|
5353 | bool operator==(const xpath_string& o) const
|
---|
5354 | {
|
---|
5355 | return strequal(_buffer, o._buffer);
|
---|
5356 | }
|
---|
5357 |
|
---|
5358 | bool operator!=(const xpath_string& o) const
|
---|
5359 | {
|
---|
5360 | return !strequal(_buffer, o._buffer);
|
---|
5361 | }
|
---|
5362 |
|
---|
5363 | bool uses_heap() const
|
---|
5364 | {
|
---|
5365 | return _uses_heap;
|
---|
5366 | }
|
---|
5367 | };
|
---|
5368 |
|
---|
5369 | xpath_string xpath_string_const(const char_t* str)
|
---|
5370 | {
|
---|
5371 | return xpath_string(str, false);
|
---|
5372 | }
|
---|
5373 | }
|
---|
5374 |
|
---|
5375 | namespace
|
---|
5376 | {
|
---|
5377 | bool starts_with(const char_t* string, const char_t* pattern)
|
---|
5378 | {
|
---|
5379 | while (*pattern && *string == *pattern)
|
---|
5380 | {
|
---|
5381 | string++;
|
---|
5382 | pattern++;
|
---|
5383 | }
|
---|
5384 |
|
---|
5385 | return *pattern == 0;
|
---|
5386 | }
|
---|
5387 |
|
---|
5388 | const char_t* find_char(const char_t* s, char_t c)
|
---|
5389 | {
|
---|
5390 | #ifdef PUGIXML_WCHAR_MODE
|
---|
5391 | return wcschr(s, c);
|
---|
5392 | #else
|
---|
5393 | return strchr(s, c);
|
---|
5394 | #endif
|
---|
5395 | }
|
---|
5396 |
|
---|
5397 | const char_t* find_substring(const char_t* s, const char_t* p)
|
---|
5398 | {
|
---|
5399 | #ifdef PUGIXML_WCHAR_MODE
|
---|
5400 | // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
|
---|
5401 | return (*p == 0) ? s : wcsstr(s, p);
|
---|
5402 | #else
|
---|
5403 | return strstr(s, p);
|
---|
5404 | #endif
|
---|
5405 | }
|
---|
5406 |
|
---|
5407 | // Converts symbol to lower case, if it is an ASCII one
|
---|
5408 | char_t tolower_ascii(char_t ch)
|
---|
5409 | {
|
---|
5410 | return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
|
---|
5411 | }
|
---|
5412 |
|
---|
5413 | xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
|
---|
5414 | {
|
---|
5415 | if (na.attribute())
|
---|
5416 | return xpath_string_const(na.attribute().value());
|
---|
5417 | else
|
---|
5418 | {
|
---|
5419 | const xml_node& n = na.node();
|
---|
5420 |
|
---|
5421 | switch (n.type())
|
---|
5422 | {
|
---|
5423 | case node_pcdata:
|
---|
5424 | case node_cdata:
|
---|
5425 | case node_comment:
|
---|
5426 | case node_pi:
|
---|
5427 | return xpath_string_const(n.value());
|
---|
5428 |
|
---|
5429 | case node_document:
|
---|
5430 | case node_element:
|
---|
5431 | {
|
---|
5432 | xpath_string result;
|
---|
5433 |
|
---|
5434 | xml_node cur = n.first_child();
|
---|
5435 |
|
---|
5436 | while (cur && cur != n)
|
---|
5437 | {
|
---|
5438 | if (cur.type() == node_pcdata || cur.type() == node_cdata)
|
---|
5439 | result.append(xpath_string_const(cur.value()), alloc);
|
---|
5440 |
|
---|
5441 | if (cur.first_child())
|
---|
5442 | cur = cur.first_child();
|
---|
5443 | else if (cur.next_sibling())
|
---|
5444 | cur = cur.next_sibling();
|
---|
5445 | else
|
---|
5446 | {
|
---|
5447 | while (!cur.next_sibling() && cur != n)
|
---|
5448 | cur = cur.parent();
|
---|
5449 |
|
---|
5450 | if (cur != n) cur = cur.next_sibling();
|
---|
5451 | }
|
---|
5452 | }
|
---|
5453 |
|
---|
5454 | return result;
|
---|
5455 | }
|
---|
5456 |
|
---|
5457 | default:
|
---|
5458 | return xpath_string();
|
---|
5459 | }
|
---|
5460 | }
|
---|
5461 | }
|
---|
5462 |
|
---|
5463 | unsigned int node_height(xml_node n)
|
---|
5464 | {
|
---|
5465 | unsigned int result = 0;
|
---|
5466 |
|
---|
5467 | while (n)
|
---|
5468 | {
|
---|
5469 | ++result;
|
---|
5470 | n = n.parent();
|
---|
5471 | }
|
---|
5472 |
|
---|
5473 | return result;
|
---|
5474 | }
|
---|
5475 |
|
---|
5476 | bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
|
---|
5477 | {
|
---|
5478 | // normalize heights
|
---|
5479 | for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
|
---|
5480 | for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
|
---|
5481 |
|
---|
5482 | // one node is the ancestor of the other
|
---|
5483 | if (ln == rn) return lh < rh;
|
---|
5484 |
|
---|
5485 | // find common ancestor
|
---|
5486 | while (ln.parent() != rn.parent())
|
---|
5487 | {
|
---|
5488 | ln = ln.parent();
|
---|
5489 | rn = rn.parent();
|
---|
5490 | }
|
---|
5491 |
|
---|
5492 | // there is no common ancestor (the shared parent is null), nodes are from different documents
|
---|
5493 | if (!ln.parent()) return ln < rn;
|
---|
5494 |
|
---|
5495 | // determine sibling order
|
---|
5496 | for (; ln; ln = ln.next_sibling())
|
---|
5497 | if (ln == rn)
|
---|
5498 | return true;
|
---|
5499 |
|
---|
5500 | return false;
|
---|
5501 | }
|
---|
5502 |
|
---|
5503 | bool node_is_ancestor(xml_node parent, xml_node node)
|
---|
5504 | {
|
---|
5505 | while (node && node != parent) node = node.parent();
|
---|
5506 |
|
---|
5507 | return parent && node == parent;
|
---|
5508 | }
|
---|
5509 |
|
---|
5510 | const void* document_order(const xpath_node& xnode)
|
---|
5511 | {
|
---|
5512 | xml_node_struct* node = xnode.node().internal_object();
|
---|
5513 |
|
---|
5514 | if (node)
|
---|
5515 | {
|
---|
5516 | if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
|
---|
5517 | if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
|
---|
5518 | return 0;
|
---|
5519 | }
|
---|
5520 |
|
---|
5521 | xml_attribute_struct* attr = xnode.attribute().internal_object();
|
---|
5522 |
|
---|
5523 | if (attr)
|
---|
5524 | {
|
---|
5525 | if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
|
---|
5526 | if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
|
---|
5527 | return 0;
|
---|
5528 | }
|
---|
5529 |
|
---|
5530 | return 0;
|
---|
5531 | }
|
---|
5532 |
|
---|
5533 | struct document_order_comparator
|
---|
5534 | {
|
---|
5535 | bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
|
---|
5536 | {
|
---|
5537 | // optimized document order based check
|
---|
5538 | const void* lo = document_order(lhs);
|
---|
5539 | const void* ro = document_order(rhs);
|
---|
5540 |
|
---|
5541 | if (lo && ro) return lo < ro;
|
---|
5542 |
|
---|
5543 | // slow comparison
|
---|
5544 | xml_node ln = lhs.node(), rn = rhs.node();
|
---|
5545 |
|
---|
5546 | // compare attributes
|
---|
5547 | if (lhs.attribute() && rhs.attribute())
|
---|
5548 | {
|
---|
5549 | // shared parent
|
---|
5550 | if (lhs.parent() == rhs.parent())
|
---|
5551 | {
|
---|
5552 | // determine sibling order
|
---|
5553 | for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
|
---|
5554 | if (a == rhs.attribute())
|
---|
5555 | return true;
|
---|
5556 |
|
---|
5557 | return false;
|
---|
5558 | }
|
---|
5559 |
|
---|
5560 | // compare attribute parents
|
---|
5561 | ln = lhs.parent();
|
---|
5562 | rn = rhs.parent();
|
---|
5563 | }
|
---|
5564 | else if (lhs.attribute())
|
---|
5565 | {
|
---|
5566 | // attributes go after the parent element
|
---|
5567 | if (lhs.parent() == rhs.node()) return false;
|
---|
5568 |
|
---|
5569 | ln = lhs.parent();
|
---|
5570 | }
|
---|
5571 | else if (rhs.attribute())
|
---|
5572 | {
|
---|
5573 | // attributes go after the parent element
|
---|
5574 | if (rhs.parent() == lhs.node()) return true;
|
---|
5575 |
|
---|
5576 | rn = rhs.parent();
|
---|
5577 | }
|
---|
5578 |
|
---|
5579 | if (ln == rn) return false;
|
---|
5580 |
|
---|
5581 | unsigned int lh = node_height(ln);
|
---|
5582 | unsigned int rh = node_height(rn);
|
---|
5583 |
|
---|
5584 | return node_is_before(ln, lh, rn, rh);
|
---|
5585 | }
|
---|
5586 | };
|
---|
5587 |
|
---|
5588 | struct duplicate_comparator
|
---|
5589 | {
|
---|
5590 | bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
|
---|
5591 | {
|
---|
5592 | if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
|
---|
5593 | else return rhs.attribute() ? false : lhs.node() < rhs.node();
|
---|
5594 | }
|
---|
5595 | };
|
---|
5596 |
|
---|
5597 | double gen_nan()
|
---|
5598 | {
|
---|
5599 | #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
|
---|
5600 | union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1];
|
---|
5601 | u[0].i = 0x7fc00000;
|
---|
5602 | return u[0].f;
|
---|
5603 | #else
|
---|
5604 | // fallback
|
---|
5605 | const volatile double zero = 0.0;
|
---|
5606 | return zero / zero;
|
---|
5607 | #endif
|
---|
5608 | }
|
---|
5609 |
|
---|
5610 | bool is_nan(double value)
|
---|
5611 | {
|
---|
5612 | #if defined(_MSC_VER) || defined(__BORLANDC__)
|
---|
5613 | return !!_isnan(value);
|
---|
5614 | #elif defined(fpclassify) && defined(FP_NAN)
|
---|
5615 | return fpclassify(value) == FP_NAN;
|
---|
5616 | #else
|
---|
5617 | // fallback
|
---|
5618 | const volatile double v = value;
|
---|
5619 | return v != v;
|
---|
5620 | #endif
|
---|
5621 | }
|
---|
5622 |
|
---|
5623 | const char_t* convert_number_to_string_special(double value)
|
---|
5624 | {
|
---|
5625 | #if defined(_MSC_VER) || defined(__BORLANDC__)
|
---|
5626 | if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
|
---|
5627 | if (_isnan(value)) return PUGIXML_TEXT("NaN");
|
---|
5628 | return PUGIXML_TEXT("-Infinity") + (value > 0);
|
---|
5629 | #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
|
---|
5630 | switch (fpclassify(value))
|
---|
5631 | {
|
---|
5632 | case FP_NAN:
|
---|
5633 | return PUGIXML_TEXT("NaN");
|
---|
5634 |
|
---|
5635 | case FP_INFINITE:
|
---|
5636 | return PUGIXML_TEXT("-Infinity") + (value > 0);
|
---|
5637 |
|
---|
5638 | case FP_ZERO:
|
---|
5639 | return PUGIXML_TEXT("0");
|
---|
5640 |
|
---|
5641 | default:
|
---|
5642 | return 0;
|
---|
5643 | }
|
---|
5644 | #else
|
---|
5645 | // fallback
|
---|
5646 | const volatile double v = value;
|
---|
5647 |
|
---|
5648 | if (v == 0) return PUGIXML_TEXT("0");
|
---|
5649 | if (v != v) return PUGIXML_TEXT("NaN");
|
---|
5650 | if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0);
|
---|
5651 | return 0;
|
---|
5652 | #endif
|
---|
5653 | }
|
---|
5654 |
|
---|
5655 | bool convert_number_to_boolean(double value)
|
---|
5656 | {
|
---|
5657 | return (value != 0 && !is_nan(value));
|
---|
5658 | }
|
---|
5659 |
|
---|
5660 | void truncate_zeros(char* begin, char* end)
|
---|
5661 | {
|
---|
5662 | while (begin != end && end[-1] == '0') end--;
|
---|
5663 |
|
---|
5664 | *end = 0;
|
---|
5665 | }
|
---|
5666 |
|
---|
5667 | // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
|
---|
5668 | #if defined(_MSC_VER) && _MSC_VER >= 1400
|
---|
5669 | void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
|
---|
5670 | {
|
---|
5671 | // get base values
|
---|
5672 | int sign, exponent;
|
---|
5673 | _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
|
---|
5674 |
|
---|
5675 | // truncate redundant zeros
|
---|
5676 | truncate_zeros(buffer, buffer + strlen(buffer));
|
---|
5677 |
|
---|
5678 | // fill results
|
---|
5679 | *out_mantissa = buffer;
|
---|
5680 | *out_exponent = exponent;
|
---|
5681 | }
|
---|
5682 | #else
|
---|
5683 | void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
|
---|
5684 | {
|
---|
5685 | // get a scientific notation value with IEEE DBL_DIG decimals
|
---|
5686 | sprintf(buffer, "%.*e", DBL_DIG, value);
|
---|
5687 | assert(strlen(buffer) < buffer_size);
|
---|
5688 | (void)!buffer_size;
|
---|
5689 |
|
---|
5690 | // get the exponent (possibly negative)
|
---|
5691 | char* exponent_string = strchr(buffer, 'e');
|
---|
5692 | assert(exponent_string);
|
---|
5693 |
|
---|
5694 | int exponent = atoi(exponent_string + 1);
|
---|
5695 |
|
---|
5696 | // extract mantissa string: skip sign
|
---|
5697 | char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
|
---|
5698 | assert(mantissa[0] != '0' && mantissa[1] == '.');
|
---|
5699 |
|
---|
5700 | // divide mantissa by 10 to eliminate integer part
|
---|
5701 | mantissa[1] = mantissa[0];
|
---|
5702 | mantissa++;
|
---|
5703 | exponent++;
|
---|
5704 |
|
---|
5705 | // remove extra mantissa digits and zero-terminate mantissa
|
---|
5706 | truncate_zeros(mantissa, exponent_string);
|
---|
5707 |
|
---|
5708 | // fill results
|
---|
5709 | *out_mantissa = mantissa;
|
---|
5710 | *out_exponent = exponent;
|
---|
5711 | }
|
---|
5712 | #endif
|
---|
5713 |
|
---|
5714 | xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
|
---|
5715 | {
|
---|
5716 | // try special number conversion
|
---|
5717 | const char_t* special = convert_number_to_string_special(value);
|
---|
5718 | if (special) return xpath_string_const(special);
|
---|
5719 |
|
---|
5720 | // get mantissa + exponent form
|
---|
5721 | char mantissa_buffer[64];
|
---|
5722 |
|
---|
5723 | char* mantissa;
|
---|
5724 | int exponent;
|
---|
5725 | convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
|
---|
5726 |
|
---|
5727 | // make the number!
|
---|
5728 | char_t result[512];
|
---|
5729 | char_t* s = result;
|
---|
5730 |
|
---|
5731 | // sign
|
---|
5732 | if (value < 0) *s++ = '-';
|
---|
5733 |
|
---|
5734 | // integer part
|
---|
5735 | if (exponent <= 0)
|
---|
5736 | {
|
---|
5737 | *s++ = '0';
|
---|
5738 | }
|
---|
5739 | else
|
---|
5740 | {
|
---|
5741 | while (exponent > 0)
|
---|
5742 | {
|
---|
5743 | assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9);
|
---|
5744 | *s++ = *mantissa ? *mantissa++ : '0';
|
---|
5745 | exponent--;
|
---|
5746 | }
|
---|
5747 | }
|
---|
5748 |
|
---|
5749 | // fractional part
|
---|
5750 | if (*mantissa)
|
---|
5751 | {
|
---|
5752 | // decimal point
|
---|
5753 | *s++ = '.';
|
---|
5754 |
|
---|
5755 | // extra zeroes from negative exponent
|
---|
5756 | while (exponent < 0)
|
---|
5757 | {
|
---|
5758 | *s++ = '0';
|
---|
5759 | exponent++;
|
---|
5760 | }
|
---|
5761 |
|
---|
5762 | // extra mantissa digits
|
---|
5763 | while (*mantissa)
|
---|
5764 | {
|
---|
5765 | assert((unsigned)(*mantissa - '0') <= 9);
|
---|
5766 | *s++ = *mantissa++;
|
---|
5767 | }
|
---|
5768 | }
|
---|
5769 |
|
---|
5770 | // zero-terminate
|
---|
5771 | assert(s < result + sizeof(result) / sizeof(result[0]));
|
---|
5772 | *s = 0;
|
---|
5773 |
|
---|
5774 | return xpath_string(result, alloc);
|
---|
5775 | }
|
---|
5776 |
|
---|
5777 | bool check_string_to_number_format(const char_t* string)
|
---|
5778 | {
|
---|
5779 | // parse leading whitespace
|
---|
5780 | while (IS_CHARTYPE(*string, ct_space)) ++string;
|
---|
5781 |
|
---|
5782 | // parse sign
|
---|
5783 | if (*string == '-') ++string;
|
---|
5784 |
|
---|
5785 | if (!*string) return false;
|
---|
5786 |
|
---|
5787 | // if there is no integer part, there should be a decimal part with at least one digit
|
---|
5788 | if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false;
|
---|
5789 |
|
---|
5790 | // parse integer part
|
---|
5791 | while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
|
---|
5792 |
|
---|
5793 | // parse decimal part
|
---|
5794 | if (*string == '.')
|
---|
5795 | {
|
---|
5796 | ++string;
|
---|
5797 |
|
---|
5798 | while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
|
---|
5799 | }
|
---|
5800 |
|
---|
5801 | // parse trailing whitespace
|
---|
5802 | while (IS_CHARTYPE(*string, ct_space)) ++string;
|
---|
5803 |
|
---|
5804 | return *string == 0;
|
---|
5805 | }
|
---|
5806 |
|
---|
5807 | double convert_string_to_number(const char_t* string)
|
---|
5808 | {
|
---|
5809 | // check string format
|
---|
5810 | if (!check_string_to_number_format(string)) return gen_nan();
|
---|
5811 |
|
---|
5812 | // parse string
|
---|
5813 | #ifdef PUGIXML_WCHAR_MODE
|
---|
5814 | return wcstod(string, 0);
|
---|
5815 | #else
|
---|
5816 | return atof(string);
|
---|
5817 | #endif
|
---|
5818 | }
|
---|
5819 |
|
---|
5820 | bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
|
---|
5821 | {
|
---|
5822 | char_t buffer[32];
|
---|
5823 |
|
---|
5824 | size_t length = static_cast<size_t>(end - begin);
|
---|
5825 | char_t* scratch = buffer;
|
---|
5826 |
|
---|
5827 | if (length >= sizeof(buffer) / sizeof(buffer[0]))
|
---|
5828 | {
|
---|
5829 | // need to make dummy on-heap copy
|
---|
5830 | scratch = static_cast<char_t*>(global_allocate((length + 1) * sizeof(char_t)));
|
---|
5831 | if (!scratch) return false;
|
---|
5832 | }
|
---|
5833 |
|
---|
5834 | // copy string to zero-terminated buffer and perform conversion
|
---|
5835 | memcpy(scratch, begin, length * sizeof(char_t));
|
---|
5836 | scratch[length] = 0;
|
---|
5837 |
|
---|
5838 | *out_result = convert_string_to_number(scratch);
|
---|
5839 |
|
---|
5840 | // free dummy buffer
|
---|
5841 | if (scratch != buffer) global_deallocate(scratch);
|
---|
5842 |
|
---|
5843 | return true;
|
---|
5844 | }
|
---|
5845 |
|
---|
5846 | double round_nearest(double value)
|
---|
5847 | {
|
---|
5848 | return floor(value + 0.5);
|
---|
5849 | }
|
---|
5850 |
|
---|
5851 | double round_nearest_nzero(double value)
|
---|
5852 | {
|
---|
5853 | // same as round_nearest, but returns -0 for [-0.5, -0]
|
---|
5854 | // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
|
---|
5855 | return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
|
---|
5856 | }
|
---|
5857 |
|
---|
5858 | const char_t* qualified_name(const xpath_node& node)
|
---|
5859 | {
|
---|
5860 | return node.attribute() ? node.attribute().name() : node.node().name();
|
---|
5861 | }
|
---|
5862 |
|
---|
5863 | const char_t* local_name(const xpath_node& node)
|
---|
5864 | {
|
---|
5865 | const char_t* name = qualified_name(node);
|
---|
5866 | const char_t* p = find_char(name, ':');
|
---|
5867 |
|
---|
5868 | return p ? p + 1 : name;
|
---|
5869 | }
|
---|
5870 |
|
---|
5871 | struct namespace_uri_predicate
|
---|
5872 | {
|
---|
5873 | const char_t* prefix;
|
---|
5874 | size_t prefix_length;
|
---|
5875 |
|
---|
5876 | namespace_uri_predicate(const char_t* name)
|
---|
5877 | {
|
---|
5878 | const char_t* pos = find_char(name, ':');
|
---|
5879 |
|
---|
5880 | prefix = pos ? name : 0;
|
---|
5881 | prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
|
---|
5882 | }
|
---|
5883 |
|
---|
5884 | bool operator()(const xml_attribute& a) const
|
---|
5885 | {
|
---|
5886 | const char_t* name = a.name();
|
---|
5887 |
|
---|
5888 | if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
|
---|
5889 |
|
---|
5890 | return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
|
---|
5891 | }
|
---|
5892 | };
|
---|
5893 |
|
---|
5894 | const char_t* namespace_uri(const xml_node& node)
|
---|
5895 | {
|
---|
5896 | namespace_uri_predicate pred = node.name();
|
---|
5897 |
|
---|
5898 | xml_node p = node;
|
---|
5899 |
|
---|
5900 | while (p)
|
---|
5901 | {
|
---|
5902 | xml_attribute a = p.find_attribute(pred);
|
---|
5903 |
|
---|
5904 | if (a) return a.value();
|
---|
5905 |
|
---|
5906 | p = p.parent();
|
---|
5907 | }
|
---|
5908 |
|
---|
5909 | return PUGIXML_TEXT("");
|
---|
5910 | }
|
---|
5911 |
|
---|
5912 | const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
|
---|
5913 | {
|
---|
5914 | namespace_uri_predicate pred = attr.name();
|
---|
5915 |
|
---|
5916 | // Default namespace does not apply to attributes
|
---|
5917 | if (!pred.prefix) return PUGIXML_TEXT("");
|
---|
5918 |
|
---|
5919 | xml_node p = parent;
|
---|
5920 |
|
---|
5921 | while (p)
|
---|
5922 | {
|
---|
5923 | xml_attribute a = p.find_attribute(pred);
|
---|
5924 |
|
---|
5925 | if (a) return a.value();
|
---|
5926 |
|
---|
5927 | p = p.parent();
|
---|
5928 | }
|
---|
5929 |
|
---|
5930 | return PUGIXML_TEXT("");
|
---|
5931 | }
|
---|
5932 |
|
---|
5933 | const char_t* namespace_uri(const xpath_node& node)
|
---|
5934 | {
|
---|
5935 | return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
|
---|
5936 | }
|
---|
5937 |
|
---|
5938 | void normalize_space(char_t* buffer)
|
---|
5939 | {
|
---|
5940 | char_t* write = buffer;
|
---|
5941 |
|
---|
5942 | for (char_t* it = buffer; *it; )
|
---|
5943 | {
|
---|
5944 | char_t ch = *it++;
|
---|
5945 |
|
---|
5946 | if (IS_CHARTYPE(ch, ct_space))
|
---|
5947 | {
|
---|
5948 | // replace whitespace sequence with single space
|
---|
5949 | while (IS_CHARTYPE(*it, ct_space)) it++;
|
---|
5950 |
|
---|
5951 | // avoid leading spaces
|
---|
5952 | if (write != buffer) *write++ = ' ';
|
---|
5953 | }
|
---|
5954 | else *write++ = ch;
|
---|
5955 | }
|
---|
5956 |
|
---|
5957 | // remove trailing space
|
---|
5958 | if (write != buffer && IS_CHARTYPE(write[-1], ct_space)) write--;
|
---|
5959 |
|
---|
5960 | // zero-terminate
|
---|
5961 | *write = 0;
|
---|
5962 | }
|
---|
5963 |
|
---|
5964 | void translate(char_t* buffer, const char_t* from, const char_t* to)
|
---|
5965 | {
|
---|
5966 | size_t to_length = strlength(to);
|
---|
5967 |
|
---|
5968 | char_t* write = buffer;
|
---|
5969 |
|
---|
5970 | while (*buffer)
|
---|
5971 | {
|
---|
5972 | DMC_VOLATILE char_t ch = *buffer++;
|
---|
5973 |
|
---|
5974 | const char_t* pos = find_char(from, ch);
|
---|
5975 |
|
---|
5976 | if (!pos)
|
---|
5977 | *write++ = ch; // do not process
|
---|
5978 | else if (static_cast<size_t>(pos - from) < to_length)
|
---|
5979 | *write++ = to[pos - from]; // replace
|
---|
5980 | }
|
---|
5981 |
|
---|
5982 | // zero-terminate
|
---|
5983 | *write = 0;
|
---|
5984 | }
|
---|
5985 |
|
---|
5986 | struct xpath_variable_boolean: xpath_variable
|
---|
5987 | {
|
---|
5988 | xpath_variable_boolean(): value(false)
|
---|
5989 | {
|
---|
5990 | }
|
---|
5991 |
|
---|
5992 | bool value;
|
---|
5993 | char_t name[1];
|
---|
5994 | };
|
---|
5995 |
|
---|
5996 | struct xpath_variable_number: xpath_variable
|
---|
5997 | {
|
---|
5998 | xpath_variable_number(): value(0)
|
---|
5999 | {
|
---|
6000 | }
|
---|
6001 |
|
---|
6002 | double value;
|
---|
6003 | char_t name[1];
|
---|
6004 | };
|
---|
6005 |
|
---|
6006 | struct xpath_variable_string: xpath_variable
|
---|
6007 | {
|
---|
6008 | xpath_variable_string(): value(0)
|
---|
6009 | {
|
---|
6010 | }
|
---|
6011 |
|
---|
6012 | ~xpath_variable_string()
|
---|
6013 | {
|
---|
6014 | if (value) global_deallocate(value);
|
---|
6015 | }
|
---|
6016 |
|
---|
6017 | char_t* value;
|
---|
6018 | char_t name[1];
|
---|
6019 | };
|
---|
6020 |
|
---|
6021 | struct xpath_variable_node_set: xpath_variable
|
---|
6022 | {
|
---|
6023 | xpath_node_set value;
|
---|
6024 | char_t name[1];
|
---|
6025 | };
|
---|
6026 |
|
---|
6027 | const xpath_node_set dummy_node_set;
|
---|
6028 |
|
---|
6029 | unsigned int hash_string(const char_t* str)
|
---|
6030 | {
|
---|
6031 | // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
|
---|
6032 | unsigned int result = 0;
|
---|
6033 |
|
---|
6034 | while (*str)
|
---|
6035 | {
|
---|
6036 | result += static_cast<unsigned int>(*str++);
|
---|
6037 | result += result << 10;
|
---|
6038 | result ^= result >> 6;
|
---|
6039 | }
|
---|
6040 |
|
---|
6041 | result += result << 3;
|
---|
6042 | result ^= result >> 11;
|
---|
6043 | result += result << 15;
|
---|
6044 |
|
---|
6045 | return result;
|
---|
6046 | }
|
---|
6047 |
|
---|
6048 | template <typename T> T* new_xpath_variable(const char_t* name)
|
---|
6049 | {
|
---|
6050 | size_t length = strlength(name);
|
---|
6051 | if (length == 0) return 0; // empty variable names are invalid
|
---|
6052 |
|
---|
6053 | // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
|
---|
6054 | void* memory = global_allocate(sizeof(T) + length * sizeof(char_t));
|
---|
6055 | if (!memory) return 0;
|
---|
6056 |
|
---|
6057 | T* result = new (memory) T();
|
---|
6058 |
|
---|
6059 | memcpy(result->name, name, (length + 1) * sizeof(char_t));
|
---|
6060 |
|
---|
6061 | return result;
|
---|
6062 | }
|
---|
6063 |
|
---|
6064 | xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
|
---|
6065 | {
|
---|
6066 | switch (type)
|
---|
6067 | {
|
---|
6068 | case xpath_type_node_set:
|
---|
6069 | return new_xpath_variable<xpath_variable_node_set>(name);
|
---|
6070 |
|
---|
6071 | case xpath_type_number:
|
---|
6072 | return new_xpath_variable<xpath_variable_number>(name);
|
---|
6073 |
|
---|
6074 | case xpath_type_string:
|
---|
6075 | return new_xpath_variable<xpath_variable_string>(name);
|
---|
6076 |
|
---|
6077 | case xpath_type_boolean:
|
---|
6078 | return new_xpath_variable<xpath_variable_boolean>(name);
|
---|
6079 |
|
---|
6080 | default:
|
---|
6081 | return 0;
|
---|
6082 | }
|
---|
6083 | }
|
---|
6084 |
|
---|
6085 | template <typename T> void delete_xpath_variable(T* var)
|
---|
6086 | {
|
---|
6087 | var->~T();
|
---|
6088 | global_deallocate(var);
|
---|
6089 | }
|
---|
6090 |
|
---|
6091 | void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
|
---|
6092 | {
|
---|
6093 | switch (type)
|
---|
6094 | {
|
---|
6095 | case xpath_type_node_set:
|
---|
6096 | delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
|
---|
6097 | break;
|
---|
6098 |
|
---|
6099 | case xpath_type_number:
|
---|
6100 | delete_xpath_variable(static_cast<xpath_variable_number*>(var));
|
---|
6101 | break;
|
---|
6102 |
|
---|
6103 | case xpath_type_string:
|
---|
6104 | delete_xpath_variable(static_cast<xpath_variable_string*>(var));
|
---|
6105 | break;
|
---|
6106 |
|
---|
6107 | case xpath_type_boolean:
|
---|
6108 | delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
|
---|
6109 | break;
|
---|
6110 |
|
---|
6111 | default:
|
---|
6112 | assert(!"Invalid variable type");
|
---|
6113 | }
|
---|
6114 | }
|
---|
6115 |
|
---|
6116 | xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end)
|
---|
6117 | {
|
---|
6118 | char_t buffer[32];
|
---|
6119 |
|
---|
6120 | size_t length = static_cast<size_t>(end - begin);
|
---|
6121 | char_t* scratch = buffer;
|
---|
6122 |
|
---|
6123 | if (length >= sizeof(buffer) / sizeof(buffer[0]))
|
---|
6124 | {
|
---|
6125 | // need to make dummy on-heap copy
|
---|
6126 | scratch = static_cast<char_t*>(global_allocate((length + 1) * sizeof(char_t)));
|
---|
6127 | if (!scratch) return 0;
|
---|
6128 | }
|
---|
6129 |
|
---|
6130 | // copy string to zero-terminated buffer and perform lookup
|
---|
6131 | memcpy(scratch, begin, length * sizeof(char_t));
|
---|
6132 | scratch[length] = 0;
|
---|
6133 |
|
---|
6134 | xpath_variable* result = set->get(scratch);
|
---|
6135 |
|
---|
6136 | // free dummy buffer
|
---|
6137 | if (scratch != buffer) global_deallocate(scratch);
|
---|
6138 |
|
---|
6139 | return result;
|
---|
6140 | }
|
---|
6141 | }
|
---|
6142 |
|
---|
6143 | // Internal node set class
|
---|
6144 | namespace
|
---|
6145 | {
|
---|
6146 | xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
|
---|
6147 | {
|
---|
6148 | xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
|
---|
6149 |
|
---|
6150 | if (type == xpath_node_set::type_unsorted)
|
---|
6151 | {
|
---|
6152 | sort(begin, end, document_order_comparator());
|
---|
6153 |
|
---|
6154 | type = xpath_node_set::type_sorted;
|
---|
6155 | }
|
---|
6156 |
|
---|
6157 | if (type != order) reverse(begin, end);
|
---|
6158 |
|
---|
6159 | return order;
|
---|
6160 | }
|
---|
6161 |
|
---|
6162 | xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
|
---|
6163 | {
|
---|
6164 | if (begin == end) return xpath_node();
|
---|
6165 |
|
---|
6166 | switch (type)
|
---|
6167 | {
|
---|
6168 | case xpath_node_set::type_sorted:
|
---|
6169 | return *begin;
|
---|
6170 |
|
---|
6171 | case xpath_node_set::type_sorted_reverse:
|
---|
6172 | return *(end - 1);
|
---|
6173 |
|
---|
6174 | case xpath_node_set::type_unsorted:
|
---|
6175 | return *min_element(begin, end, document_order_comparator());
|
---|
6176 |
|
---|
6177 | default:
|
---|
6178 | assert(!"Invalid node set type");
|
---|
6179 | return xpath_node();
|
---|
6180 | }
|
---|
6181 | }
|
---|
6182 | class xpath_node_set_raw
|
---|
6183 | {
|
---|
6184 | xpath_node_set::type_t _type;
|
---|
6185 |
|
---|
6186 | xpath_node* _begin;
|
---|
6187 | xpath_node* _end;
|
---|
6188 | xpath_node* _eos;
|
---|
6189 |
|
---|
6190 | public:
|
---|
6191 | xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
|
---|
6192 | {
|
---|
6193 | }
|
---|
6194 |
|
---|
6195 | xpath_node* begin() const
|
---|
6196 | {
|
---|
6197 | return _begin;
|
---|
6198 | }
|
---|
6199 |
|
---|
6200 | xpath_node* end() const
|
---|
6201 | {
|
---|
6202 | return _end;
|
---|
6203 | }
|
---|
6204 |
|
---|
6205 | bool empty() const
|
---|
6206 | {
|
---|
6207 | return _begin == _end;
|
---|
6208 | }
|
---|
6209 |
|
---|
6210 | size_t size() const
|
---|
6211 | {
|
---|
6212 | return static_cast<size_t>(_end - _begin);
|
---|
6213 | }
|
---|
6214 |
|
---|
6215 | xpath_node first() const
|
---|
6216 | {
|
---|
6217 | return xpath_first(_begin, _end, _type);
|
---|
6218 | }
|
---|
6219 |
|
---|
6220 | void push_back(const xpath_node& node, xpath_allocator* alloc)
|
---|
6221 | {
|
---|
6222 | if (_end == _eos)
|
---|
6223 | {
|
---|
6224 | size_t capacity = static_cast<size_t>(_eos - _begin);
|
---|
6225 |
|
---|
6226 | // get new capacity (1.5x rule)
|
---|
6227 | size_t new_capacity = capacity + capacity / 2 + 1;
|
---|
6228 |
|
---|
6229 | // reallocate the old array or allocate a new one
|
---|
6230 | xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
|
---|
6231 | assert(data);
|
---|
6232 |
|
---|
6233 | // finalize
|
---|
6234 | _begin = data;
|
---|
6235 | _end = data + capacity;
|
---|
6236 | _eos = data + new_capacity;
|
---|
6237 | }
|
---|
6238 |
|
---|
6239 | *_end++ = node;
|
---|
6240 | }
|
---|
6241 |
|
---|
6242 | void append(const xpath_node* begin, const xpath_node* end, xpath_allocator* alloc)
|
---|
6243 | {
|
---|
6244 | size_t size = static_cast<size_t>(_end - _begin);
|
---|
6245 | size_t capacity = static_cast<size_t>(_eos - _begin);
|
---|
6246 | size_t count = static_cast<size_t>(end - begin);
|
---|
6247 |
|
---|
6248 | if (size + count > capacity)
|
---|
6249 | {
|
---|
6250 | // reallocate the old array or allocate a new one
|
---|
6251 | xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size + count) * sizeof(xpath_node)));
|
---|
6252 | assert(data);
|
---|
6253 |
|
---|
6254 | // finalize
|
---|
6255 | _begin = data;
|
---|
6256 | _end = data + size;
|
---|
6257 | _eos = data + size + count;
|
---|
6258 | }
|
---|
6259 |
|
---|
6260 | memcpy(_end, begin, count * sizeof(xpath_node));
|
---|
6261 | _end += count;
|
---|
6262 | }
|
---|
6263 |
|
---|
6264 | void sort_do()
|
---|
6265 | {
|
---|
6266 | _type = xpath_sort(_begin, _end, _type, false);
|
---|
6267 | }
|
---|
6268 |
|
---|
6269 | void truncate(xpath_node* pos)
|
---|
6270 | {
|
---|
6271 | assert(_begin <= pos && pos <= _end);
|
---|
6272 |
|
---|
6273 | _end = pos;
|
---|
6274 | }
|
---|
6275 |
|
---|
6276 | void remove_duplicates()
|
---|
6277 | {
|
---|
6278 | if (_type == xpath_node_set::type_unsorted)
|
---|
6279 | sort(_begin, _end, duplicate_comparator());
|
---|
6280 |
|
---|
6281 | _end = unique(_begin, _end);
|
---|
6282 | }
|
---|
6283 |
|
---|
6284 | xpath_node_set::type_t type() const
|
---|
6285 | {
|
---|
6286 | return _type;
|
---|
6287 | }
|
---|
6288 |
|
---|
6289 | void set_type(xpath_node_set::type_t type)
|
---|
6290 | {
|
---|
6291 | _type = type;
|
---|
6292 | }
|
---|
6293 | };
|
---|
6294 | }
|
---|
6295 |
|
---|
6296 | namespace
|
---|
6297 | {
|
---|
6298 | struct xpath_context
|
---|
6299 | {
|
---|
6300 | xpath_node n;
|
---|
6301 | size_t position, size;
|
---|
6302 |
|
---|
6303 | xpath_context(const xpath_node& n, size_t position, size_t size): n(n), position(position), size(size)
|
---|
6304 | {
|
---|
6305 | }
|
---|
6306 | };
|
---|
6307 |
|
---|
6308 | enum lexeme_t
|
---|
6309 | {
|
---|
6310 | lex_none = 0,
|
---|
6311 | lex_equal,
|
---|
6312 | lex_not_equal,
|
---|
6313 | lex_less,
|
---|
6314 | lex_greater,
|
---|
6315 | lex_less_or_equal,
|
---|
6316 | lex_greater_or_equal,
|
---|
6317 | lex_plus,
|
---|
6318 | lex_minus,
|
---|
6319 | lex_multiply,
|
---|
6320 | lex_union,
|
---|
6321 | lex_var_ref,
|
---|
6322 | lex_open_brace,
|
---|
6323 | lex_close_brace,
|
---|
6324 | lex_quoted_string,
|
---|
6325 | lex_number,
|
---|
6326 | lex_slash,
|
---|
6327 | lex_double_slash,
|
---|
6328 | lex_open_square_brace,
|
---|
6329 | lex_close_square_brace,
|
---|
6330 | lex_string,
|
---|
6331 | lex_comma,
|
---|
6332 | lex_axis_attribute,
|
---|
6333 | lex_dot,
|
---|
6334 | lex_double_dot,
|
---|
6335 | lex_double_colon,
|
---|
6336 | lex_eof
|
---|
6337 | };
|
---|
6338 |
|
---|
6339 | struct xpath_lexer_string
|
---|
6340 | {
|
---|
6341 | const char_t* begin;
|
---|
6342 | const char_t* end;
|
---|
6343 |
|
---|
6344 | xpath_lexer_string(): begin(0), end(0)
|
---|
6345 | {
|
---|
6346 | }
|
---|
6347 |
|
---|
6348 | bool operator==(const char_t* other) const
|
---|
6349 | {
|
---|
6350 | size_t length = static_cast<size_t>(end - begin);
|
---|
6351 |
|
---|
6352 | return strequalrange(other, begin, length);
|
---|
6353 | }
|
---|
6354 | };
|
---|
6355 |
|
---|
6356 | class xpath_lexer
|
---|
6357 | {
|
---|
6358 | const char_t* _cur;
|
---|
6359 | const char_t* _cur_lexeme_pos;
|
---|
6360 | xpath_lexer_string _cur_lexeme_contents;
|
---|
6361 |
|
---|
6362 | lexeme_t _cur_lexeme;
|
---|
6363 |
|
---|
6364 | public:
|
---|
6365 | explicit xpath_lexer(const char_t* query): _cur(query)
|
---|
6366 | {
|
---|
6367 | next();
|
---|
6368 | }
|
---|
6369 |
|
---|
6370 | const char_t* state() const
|
---|
6371 | {
|
---|
6372 | return _cur;
|
---|
6373 | }
|
---|
6374 |
|
---|
6375 | void next()
|
---|
6376 | {
|
---|
6377 | const char_t* cur = _cur;
|
---|
6378 |
|
---|
6379 | while (IS_CHARTYPE(*cur, ct_space)) ++cur;
|
---|
6380 |
|
---|
6381 | // save lexeme position for error reporting
|
---|
6382 | _cur_lexeme_pos = cur;
|
---|
6383 |
|
---|
6384 | switch (*cur)
|
---|
6385 | {
|
---|
6386 | case 0:
|
---|
6387 | _cur_lexeme = lex_eof;
|
---|
6388 | break;
|
---|
6389 |
|
---|
6390 | case '>':
|
---|
6391 | if (*(cur+1) == '=')
|
---|
6392 | {
|
---|
6393 | cur += 2;
|
---|
6394 | _cur_lexeme = lex_greater_or_equal;
|
---|
6395 | }
|
---|
6396 | else
|
---|
6397 | {
|
---|
6398 | cur += 1;
|
---|
6399 | _cur_lexeme = lex_greater;
|
---|
6400 | }
|
---|
6401 | break;
|
---|
6402 |
|
---|
6403 | case '<':
|
---|
6404 | if (*(cur+1) == '=')
|
---|
6405 | {
|
---|
6406 | cur += 2;
|
---|
6407 | _cur_lexeme = lex_less_or_equal;
|
---|
6408 | }
|
---|
6409 | else
|
---|
6410 | {
|
---|
6411 | cur += 1;
|
---|
6412 | _cur_lexeme = lex_less;
|
---|
6413 | }
|
---|
6414 | break;
|
---|
6415 |
|
---|
6416 | case '!':
|
---|
6417 | if (*(cur+1) == '=')
|
---|
6418 | {
|
---|
6419 | cur += 2;
|
---|
6420 | _cur_lexeme = lex_not_equal;
|
---|
6421 | }
|
---|
6422 | else
|
---|
6423 | {
|
---|
6424 | _cur_lexeme = lex_none;
|
---|
6425 | }
|
---|
6426 | break;
|
---|
6427 |
|
---|
6428 | case '=':
|
---|
6429 | cur += 1;
|
---|
6430 | _cur_lexeme = lex_equal;
|
---|
6431 |
|
---|
6432 | break;
|
---|
6433 |
|
---|
6434 | case '+':
|
---|
6435 | cur += 1;
|
---|
6436 | _cur_lexeme = lex_plus;
|
---|
6437 |
|
---|
6438 | break;
|
---|
6439 |
|
---|
6440 | case '-':
|
---|
6441 | cur += 1;
|
---|
6442 | _cur_lexeme = lex_minus;
|
---|
6443 |
|
---|
6444 | break;
|
---|
6445 |
|
---|
6446 | case '*':
|
---|
6447 | cur += 1;
|
---|
6448 | _cur_lexeme = lex_multiply;
|
---|
6449 |
|
---|
6450 | break;
|
---|
6451 |
|
---|
6452 | case '|':
|
---|
6453 | cur += 1;
|
---|
6454 | _cur_lexeme = lex_union;
|
---|
6455 |
|
---|
6456 | break;
|
---|
6457 |
|
---|
6458 | case '$':
|
---|
6459 | cur += 1;
|
---|
6460 |
|
---|
6461 | if (IS_CHARTYPEX(*cur, ctx_start_symbol))
|
---|
6462 | {
|
---|
6463 | _cur_lexeme_contents.begin = cur;
|
---|
6464 |
|
---|
6465 | while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
|
---|
6466 |
|
---|
6467 | if (cur[0] == ':' && IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
|
---|
6468 | {
|
---|
6469 | cur++; // :
|
---|
6470 |
|
---|
6471 | while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
|
---|
6472 | }
|
---|
6473 |
|
---|
6474 | _cur_lexeme_contents.end = cur;
|
---|
6475 |
|
---|
6476 | _cur_lexeme = lex_var_ref;
|
---|
6477 | }
|
---|
6478 | else
|
---|
6479 | {
|
---|
6480 | _cur_lexeme = lex_none;
|
---|
6481 | }
|
---|
6482 |
|
---|
6483 | break;
|
---|
6484 |
|
---|
6485 | case '(':
|
---|
6486 | cur += 1;
|
---|
6487 | _cur_lexeme = lex_open_brace;
|
---|
6488 |
|
---|
6489 | break;
|
---|
6490 |
|
---|
6491 | case ')':
|
---|
6492 | cur += 1;
|
---|
6493 | _cur_lexeme = lex_close_brace;
|
---|
6494 |
|
---|
6495 | break;
|
---|
6496 |
|
---|
6497 | case '[':
|
---|
6498 | cur += 1;
|
---|
6499 | _cur_lexeme = lex_open_square_brace;
|
---|
6500 |
|
---|
6501 | break;
|
---|
6502 |
|
---|
6503 | case ']':
|
---|
6504 | cur += 1;
|
---|
6505 | _cur_lexeme = lex_close_square_brace;
|
---|
6506 |
|
---|
6507 | break;
|
---|
6508 |
|
---|
6509 | case ',':
|
---|
6510 | cur += 1;
|
---|
6511 | _cur_lexeme = lex_comma;
|
---|
6512 |
|
---|
6513 | break;
|
---|
6514 |
|
---|
6515 | case '/':
|
---|
6516 | if (*(cur+1) == '/')
|
---|
6517 | {
|
---|
6518 | cur += 2;
|
---|
6519 | _cur_lexeme = lex_double_slash;
|
---|
6520 | }
|
---|
6521 | else
|
---|
6522 | {
|
---|
6523 | cur += 1;
|
---|
6524 | _cur_lexeme = lex_slash;
|
---|
6525 | }
|
---|
6526 | break;
|
---|
6527 |
|
---|
6528 | case '.':
|
---|
6529 | if (*(cur+1) == '.')
|
---|
6530 | {
|
---|
6531 | cur += 2;
|
---|
6532 | _cur_lexeme = lex_double_dot;
|
---|
6533 | }
|
---|
6534 | else if (IS_CHARTYPEX(*(cur+1), ctx_digit))
|
---|
6535 | {
|
---|
6536 | _cur_lexeme_contents.begin = cur; // .
|
---|
6537 |
|
---|
6538 | ++cur;
|
---|
6539 |
|
---|
6540 | while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
|
---|
6541 |
|
---|
6542 | _cur_lexeme_contents.end = cur;
|
---|
6543 |
|
---|
6544 | _cur_lexeme = lex_number;
|
---|
6545 | }
|
---|
6546 | else
|
---|
6547 | {
|
---|
6548 | cur += 1;
|
---|
6549 | _cur_lexeme = lex_dot;
|
---|
6550 | }
|
---|
6551 | break;
|
---|
6552 |
|
---|
6553 | case '@':
|
---|
6554 | cur += 1;
|
---|
6555 | _cur_lexeme = lex_axis_attribute;
|
---|
6556 |
|
---|
6557 | break;
|
---|
6558 |
|
---|
6559 | case '"':
|
---|
6560 | case '\'':
|
---|
6561 | {
|
---|
6562 | char_t terminator = *cur;
|
---|
6563 |
|
---|
6564 | ++cur;
|
---|
6565 |
|
---|
6566 | _cur_lexeme_contents.begin = cur;
|
---|
6567 | while (*cur && *cur != terminator) cur++;
|
---|
6568 | _cur_lexeme_contents.end = cur;
|
---|
6569 |
|
---|
6570 | if (!*cur)
|
---|
6571 | _cur_lexeme = lex_none;
|
---|
6572 | else
|
---|
6573 | {
|
---|
6574 | cur += 1;
|
---|
6575 | _cur_lexeme = lex_quoted_string;
|
---|
6576 | }
|
---|
6577 |
|
---|
6578 | break;
|
---|
6579 | }
|
---|
6580 |
|
---|
6581 | case ':':
|
---|
6582 | if (*(cur+1) == ':')
|
---|
6583 | {
|
---|
6584 | cur += 2;
|
---|
6585 | _cur_lexeme = lex_double_colon;
|
---|
6586 | }
|
---|
6587 | else
|
---|
6588 | {
|
---|
6589 | _cur_lexeme = lex_none;
|
---|
6590 | }
|
---|
6591 | break;
|
---|
6592 |
|
---|
6593 | default:
|
---|
6594 | if (IS_CHARTYPEX(*cur, ctx_digit))
|
---|
6595 | {
|
---|
6596 | _cur_lexeme_contents.begin = cur;
|
---|
6597 |
|
---|
6598 | while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
|
---|
6599 |
|
---|
6600 | if (*cur == '.')
|
---|
6601 | {
|
---|
6602 | cur++;
|
---|
6603 |
|
---|
6604 | while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
|
---|
6605 | }
|
---|
6606 |
|
---|
6607 | _cur_lexeme_contents.end = cur;
|
---|
6608 |
|
---|
6609 | _cur_lexeme = lex_number;
|
---|
6610 | }
|
---|
6611 | else if (IS_CHARTYPEX(*cur, ctx_start_symbol))
|
---|
6612 | {
|
---|
6613 | _cur_lexeme_contents.begin = cur;
|
---|
6614 |
|
---|
6615 | while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
|
---|
6616 |
|
---|
6617 | if (cur[0] == ':')
|
---|
6618 | {
|
---|
6619 | if (cur[1] == '*') // namespace test ncname:*
|
---|
6620 | {
|
---|
6621 | cur += 2; // :*
|
---|
6622 | }
|
---|
6623 | else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
|
---|
6624 | {
|
---|
6625 | cur++; // :
|
---|
6626 |
|
---|
6627 | while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
|
---|
6628 | }
|
---|
6629 | }
|
---|
6630 |
|
---|
6631 | _cur_lexeme_contents.end = cur;
|
---|
6632 |
|
---|
6633 | _cur_lexeme = lex_string;
|
---|
6634 | }
|
---|
6635 | else
|
---|
6636 | {
|
---|
6637 | _cur_lexeme = lex_none;
|
---|
6638 | }
|
---|
6639 | }
|
---|
6640 |
|
---|
6641 | _cur = cur;
|
---|
6642 | }
|
---|
6643 |
|
---|
6644 | lexeme_t current() const
|
---|
6645 | {
|
---|
6646 | return _cur_lexeme;
|
---|
6647 | }
|
---|
6648 |
|
---|
6649 | const char_t* current_pos() const
|
---|
6650 | {
|
---|
6651 | return _cur_lexeme_pos;
|
---|
6652 | }
|
---|
6653 |
|
---|
6654 | const xpath_lexer_string& contents() const
|
---|
6655 | {
|
---|
6656 | assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
|
---|
6657 |
|
---|
6658 | return _cur_lexeme_contents;
|
---|
6659 | }
|
---|
6660 | };
|
---|
6661 |
|
---|
6662 | enum ast_type_t
|
---|
6663 | {
|
---|
6664 | ast_op_or, // left or right
|
---|
6665 | ast_op_and, // left and right
|
---|
6666 | ast_op_equal, // left = right
|
---|
6667 | ast_op_not_equal, // left != right
|
---|
6668 | ast_op_less, // left < right
|
---|
6669 | ast_op_greater, // left > right
|
---|
6670 | ast_op_less_or_equal, // left <= right
|
---|
6671 | ast_op_greater_or_equal, // left >= right
|
---|
6672 | ast_op_add, // left + right
|
---|
6673 | ast_op_subtract, // left - right
|
---|
6674 | ast_op_multiply, // left * right
|
---|
6675 | ast_op_divide, // left / right
|
---|
6676 | ast_op_mod, // left % right
|
---|
6677 | ast_op_negate, // left - right
|
---|
6678 | ast_op_union, // left | right
|
---|
6679 | ast_predicate, // apply predicate to set; next points to next predicate
|
---|
6680 | ast_filter, // select * from left where right
|
---|
6681 | ast_filter_posinv, // select * from left where right; proximity position invariant
|
---|
6682 | ast_string_constant, // string constant
|
---|
6683 | ast_number_constant, // number constant
|
---|
6684 | ast_variable, // variable
|
---|
6685 | ast_func_last, // last()
|
---|
6686 | ast_func_position, // position()
|
---|
6687 | ast_func_count, // count(left)
|
---|
6688 | ast_func_id, // id(left)
|
---|
6689 | ast_func_local_name_0, // local-name()
|
---|
6690 | ast_func_local_name_1, // local-name(left)
|
---|
6691 | ast_func_namespace_uri_0, // namespace-uri()
|
---|
6692 | ast_func_namespace_uri_1, // namespace-uri(left)
|
---|
6693 | ast_func_name_0, // name()
|
---|
6694 | ast_func_name_1, // name(left)
|
---|
6695 | ast_func_string_0, // string()
|
---|
6696 | ast_func_string_1, // string(left)
|
---|
6697 | ast_func_concat, // concat(left, right, siblings)
|
---|
6698 | ast_func_starts_with, // starts_with(left, right)
|
---|
6699 | ast_func_contains, // contains(left, right)
|
---|
6700 | ast_func_substring_before, // substring-before(left, right)
|
---|
6701 | ast_func_substring_after, // substring-after(left, right)
|
---|
6702 | ast_func_substring_2, // substring(left, right)
|
---|
6703 | ast_func_substring_3, // substring(left, right, third)
|
---|
6704 | ast_func_string_length_0, // string-length()
|
---|
6705 | ast_func_string_length_1, // string-length(left)
|
---|
6706 | ast_func_normalize_space_0, // normalize-space()
|
---|
6707 | ast_func_normalize_space_1, // normalize-space(left)
|
---|
6708 | ast_func_translate, // translate(left, right, third)
|
---|
6709 | ast_func_boolean, // boolean(left)
|
---|
6710 | ast_func_not, // not(left)
|
---|
6711 | ast_func_true, // true()
|
---|
6712 | ast_func_false, // false()
|
---|
6713 | ast_func_lang, // lang(left)
|
---|
6714 | ast_func_number_0, // number()
|
---|
6715 | ast_func_number_1, // number(left)
|
---|
6716 | ast_func_sum, // sum(left)
|
---|
6717 | ast_func_floor, // floor(left)
|
---|
6718 | ast_func_ceiling, // ceiling(left)
|
---|
6719 | ast_func_round, // round(left)
|
---|
6720 | ast_step, // process set left with step
|
---|
6721 | ast_step_root // select root node
|
---|
6722 | };
|
---|
6723 |
|
---|
6724 | enum axis_t
|
---|
6725 | {
|
---|
6726 | axis_ancestor,
|
---|
6727 | axis_ancestor_or_self,
|
---|
6728 | axis_attribute,
|
---|
6729 | axis_child,
|
---|
6730 | axis_descendant,
|
---|
6731 | axis_descendant_or_self,
|
---|
6732 | axis_following,
|
---|
6733 | axis_following_sibling,
|
---|
6734 | axis_namespace,
|
---|
6735 | axis_parent,
|
---|
6736 | axis_preceding,
|
---|
6737 | axis_preceding_sibling,
|
---|
6738 | axis_self
|
---|
6739 | };
|
---|
6740 |
|
---|
6741 | enum nodetest_t
|
---|
6742 | {
|
---|
6743 | nodetest_none,
|
---|
6744 | nodetest_name,
|
---|
6745 | nodetest_type_node,
|
---|
6746 | nodetest_type_comment,
|
---|
6747 | nodetest_type_pi,
|
---|
6748 | nodetest_type_text,
|
---|
6749 | nodetest_pi,
|
---|
6750 | nodetest_all,
|
---|
6751 | nodetest_all_in_namespace
|
---|
6752 | };
|
---|
6753 |
|
---|
6754 | template <axis_t N> struct axis_to_type
|
---|
6755 | {
|
---|
6756 | static const axis_t axis;
|
---|
6757 | };
|
---|
6758 |
|
---|
6759 | template <axis_t N> const axis_t axis_to_type<N>::axis = N;
|
---|
6760 |
|
---|
6761 | class xpath_ast_node
|
---|
6762 | {
|
---|
6763 | private:
|
---|
6764 | // node type
|
---|
6765 | char _type;
|
---|
6766 | char _rettype;
|
---|
6767 |
|
---|
6768 | // for ast_step / ast_predicate
|
---|
6769 | char _axis;
|
---|
6770 | char _test;
|
---|
6771 |
|
---|
6772 | // tree node structure
|
---|
6773 | xpath_ast_node* _left;
|
---|
6774 | xpath_ast_node* _right;
|
---|
6775 | xpath_ast_node* _next;
|
---|
6776 |
|
---|
6777 | union
|
---|
6778 | {
|
---|
6779 | // value for ast_string_constant
|
---|
6780 | const char_t* string;
|
---|
6781 | // value for ast_number_constant
|
---|
6782 | double number;
|
---|
6783 | // variable for ast_variable
|
---|
6784 | xpath_variable* variable;
|
---|
6785 | // node test for ast_step (node name/namespace/node type/pi target)
|
---|
6786 | const char_t* nodetest;
|
---|
6787 | } _data;
|
---|
6788 |
|
---|
6789 | xpath_ast_node(const xpath_ast_node&);
|
---|
6790 | xpath_ast_node& operator=(const xpath_ast_node&);
|
---|
6791 |
|
---|
6792 | template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
|
---|
6793 | {
|
---|
6794 | xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
|
---|
6795 |
|
---|
6796 | if (lt != xpath_type_node_set && rt != xpath_type_node_set)
|
---|
6797 | {
|
---|
6798 | if (lt == xpath_type_boolean || rt == xpath_type_boolean)
|
---|
6799 | return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
|
---|
6800 | else if (lt == xpath_type_number || rt == xpath_type_number)
|
---|
6801 | return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
|
---|
6802 | else if (lt == xpath_type_string || rt == xpath_type_string)
|
---|
6803 | {
|
---|
6804 | xpath_allocator_capture cr(stack.result);
|
---|
6805 |
|
---|
6806 | xpath_string ls = lhs->eval_string(c, stack);
|
---|
6807 | xpath_string rs = rhs->eval_string(c, stack);
|
---|
6808 |
|
---|
6809 | return comp(ls, rs);
|
---|
6810 | }
|
---|
6811 | }
|
---|
6812 | else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
|
---|
6813 | {
|
---|
6814 | xpath_allocator_capture cr(stack.result);
|
---|
6815 |
|
---|
6816 | xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
|
---|
6817 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
|
---|
6818 |
|
---|
6819 | for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
|
---|
6820 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
|
---|
6821 | {
|
---|
6822 | xpath_allocator_capture cri(stack.result);
|
---|
6823 |
|
---|
6824 | if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
|
---|
6825 | return true;
|
---|
6826 | }
|
---|
6827 |
|
---|
6828 | return false;
|
---|
6829 | }
|
---|
6830 | else
|
---|
6831 | {
|
---|
6832 | if (lt == xpath_type_node_set)
|
---|
6833 | {
|
---|
6834 | swap(lhs, rhs);
|
---|
6835 | swap(lt, rt);
|
---|
6836 | }
|
---|
6837 |
|
---|
6838 | if (lt == xpath_type_boolean)
|
---|
6839 | return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
|
---|
6840 | else if (lt == xpath_type_number)
|
---|
6841 | {
|
---|
6842 | xpath_allocator_capture cr(stack.result);
|
---|
6843 |
|
---|
6844 | double l = lhs->eval_number(c, stack);
|
---|
6845 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
|
---|
6846 |
|
---|
6847 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
|
---|
6848 | {
|
---|
6849 | xpath_allocator_capture cri(stack.result);
|
---|
6850 |
|
---|
6851 | if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
|
---|
6852 | return true;
|
---|
6853 | }
|
---|
6854 |
|
---|
6855 | return false;
|
---|
6856 | }
|
---|
6857 | else if (lt == xpath_type_string)
|
---|
6858 | {
|
---|
6859 | xpath_allocator_capture cr(stack.result);
|
---|
6860 |
|
---|
6861 | xpath_string l = lhs->eval_string(c, stack);
|
---|
6862 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
|
---|
6863 |
|
---|
6864 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
|
---|
6865 | {
|
---|
6866 | xpath_allocator_capture cri(stack.result);
|
---|
6867 |
|
---|
6868 | if (comp(l, string_value(*ri, stack.result)))
|
---|
6869 | return true;
|
---|
6870 | }
|
---|
6871 |
|
---|
6872 | return false;
|
---|
6873 | }
|
---|
6874 | }
|
---|
6875 |
|
---|
6876 | assert(!"Wrong types");
|
---|
6877 | return false;
|
---|
6878 | }
|
---|
6879 |
|
---|
6880 | template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
|
---|
6881 | {
|
---|
6882 | xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
|
---|
6883 |
|
---|
6884 | if (lt != xpath_type_node_set && rt != xpath_type_node_set)
|
---|
6885 | return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
|
---|
6886 | else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
|
---|
6887 | {
|
---|
6888 | xpath_allocator_capture cr(stack.result);
|
---|
6889 |
|
---|
6890 | xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
|
---|
6891 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
|
---|
6892 |
|
---|
6893 | for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
|
---|
6894 | {
|
---|
6895 | xpath_allocator_capture cri(stack.result);
|
---|
6896 |
|
---|
6897 | double l = convert_string_to_number(string_value(*li, stack.result).c_str());
|
---|
6898 |
|
---|
6899 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
|
---|
6900 | {
|
---|
6901 | xpath_allocator_capture crii(stack.result);
|
---|
6902 |
|
---|
6903 | if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
|
---|
6904 | return true;
|
---|
6905 | }
|
---|
6906 | }
|
---|
6907 |
|
---|
6908 | return false;
|
---|
6909 | }
|
---|
6910 | else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
|
---|
6911 | {
|
---|
6912 | xpath_allocator_capture cr(stack.result);
|
---|
6913 |
|
---|
6914 | double l = lhs->eval_number(c, stack);
|
---|
6915 | xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
|
---|
6916 |
|
---|
6917 | for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
|
---|
6918 | {
|
---|
6919 | xpath_allocator_capture cri(stack.result);
|
---|
6920 |
|
---|
6921 | if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
|
---|
6922 | return true;
|
---|
6923 | }
|
---|
6924 |
|
---|
6925 | return false;
|
---|
6926 | }
|
---|
6927 | else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
|
---|
6928 | {
|
---|
6929 | xpath_allocator_capture cr(stack.result);
|
---|
6930 |
|
---|
6931 | xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
|
---|
6932 | double r = rhs->eval_number(c, stack);
|
---|
6933 |
|
---|
6934 | for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
|
---|
6935 | {
|
---|
6936 | xpath_allocator_capture cri(stack.result);
|
---|
6937 |
|
---|
6938 | if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
|
---|
6939 | return true;
|
---|
6940 | }
|
---|
6941 |
|
---|
6942 | return false;
|
---|
6943 | }
|
---|
6944 | else
|
---|
6945 | {
|
---|
6946 | assert(!"Wrong types");
|
---|
6947 | return false;
|
---|
6948 | }
|
---|
6949 | }
|
---|
6950 |
|
---|
6951 | void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
|
---|
6952 | {
|
---|
6953 | assert(ns.size() >= first);
|
---|
6954 |
|
---|
6955 | size_t i = 1;
|
---|
6956 | size_t size = ns.size() - first;
|
---|
6957 |
|
---|
6958 | xpath_node* last = ns.begin() + first;
|
---|
6959 |
|
---|
6960 | // remove_if... or well, sort of
|
---|
6961 | for (xpath_node* it = last; it != ns.end(); ++it, ++i)
|
---|
6962 | {
|
---|
6963 | xpath_context c(*it, i, size);
|
---|
6964 |
|
---|
6965 | if (expr->rettype() == xpath_type_number)
|
---|
6966 | {
|
---|
6967 | if (expr->eval_number(c, stack) == i)
|
---|
6968 | *last++ = *it;
|
---|
6969 | }
|
---|
6970 | else if (expr->eval_boolean(c, stack))
|
---|
6971 | *last++ = *it;
|
---|
6972 | }
|
---|
6973 |
|
---|
6974 | ns.truncate(last);
|
---|
6975 | }
|
---|
6976 |
|
---|
6977 | void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)
|
---|
6978 | {
|
---|
6979 | if (ns.size() == first) return;
|
---|
6980 |
|
---|
6981 | for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
|
---|
6982 | {
|
---|
6983 | apply_predicate(ns, first, pred->_left, stack);
|
---|
6984 | }
|
---|
6985 | }
|
---|
6986 |
|
---|
6987 | void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)
|
---|
6988 | {
|
---|
6989 | if (!a) return;
|
---|
6990 |
|
---|
6991 | const char_t* name = a.name();
|
---|
6992 |
|
---|
6993 | // There are no attribute nodes corresponding to attributes that declare namespaces
|
---|
6994 | // That is, "xmlns:..." or "xmlns"
|
---|
6995 | if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
|
---|
6996 |
|
---|
6997 | switch (_test)
|
---|
6998 | {
|
---|
6999 | case nodetest_name:
|
---|
7000 | if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
|
---|
7001 | break;
|
---|
7002 |
|
---|
7003 | case nodetest_type_node:
|
---|
7004 | case nodetest_all:
|
---|
7005 | ns.push_back(xpath_node(a, parent), alloc);
|
---|
7006 | break;
|
---|
7007 |
|
---|
7008 | case nodetest_all_in_namespace:
|
---|
7009 | if (starts_with(name, _data.nodetest))
|
---|
7010 | ns.push_back(xpath_node(a, parent), alloc);
|
---|
7011 | break;
|
---|
7012 |
|
---|
7013 | default:
|
---|
7014 | ;
|
---|
7015 | }
|
---|
7016 | }
|
---|
7017 |
|
---|
7018 | void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)
|
---|
7019 | {
|
---|
7020 | if (!n) return;
|
---|
7021 |
|
---|
7022 | switch (_test)
|
---|
7023 | {
|
---|
7024 | case nodetest_name:
|
---|
7025 | if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
|
---|
7026 | break;
|
---|
7027 |
|
---|
7028 | case nodetest_type_node:
|
---|
7029 | ns.push_back(n, alloc);
|
---|
7030 | break;
|
---|
7031 |
|
---|
7032 | case nodetest_type_comment:
|
---|
7033 | if (n.type() == node_comment)
|
---|
7034 | ns.push_back(n, alloc);
|
---|
7035 | break;
|
---|
7036 |
|
---|
7037 | case nodetest_type_text:
|
---|
7038 | if (n.type() == node_pcdata || n.type() == node_cdata)
|
---|
7039 | ns.push_back(n, alloc);
|
---|
7040 | break;
|
---|
7041 |
|
---|
7042 | case nodetest_type_pi:
|
---|
7043 | if (n.type() == node_pi)
|
---|
7044 | ns.push_back(n, alloc);
|
---|
7045 | break;
|
---|
7046 |
|
---|
7047 | case nodetest_pi:
|
---|
7048 | if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
|
---|
7049 | ns.push_back(n, alloc);
|
---|
7050 | break;
|
---|
7051 |
|
---|
7052 | case nodetest_all:
|
---|
7053 | if (n.type() == node_element)
|
---|
7054 | ns.push_back(n, alloc);
|
---|
7055 | break;
|
---|
7056 |
|
---|
7057 | case nodetest_all_in_namespace:
|
---|
7058 | if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
|
---|
7059 | ns.push_back(n, alloc);
|
---|
7060 | break;
|
---|
7061 |
|
---|
7062 | default:
|
---|
7063 | assert(!"Unknown axis");
|
---|
7064 | }
|
---|
7065 | }
|
---|
7066 |
|
---|
7067 | template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)
|
---|
7068 | {
|
---|
7069 | const axis_t axis = T::axis;
|
---|
7070 |
|
---|
7071 | switch (axis)
|
---|
7072 | {
|
---|
7073 | case axis_attribute:
|
---|
7074 | {
|
---|
7075 | for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
|
---|
7076 | step_push(ns, a, n, alloc);
|
---|
7077 |
|
---|
7078 | break;
|
---|
7079 | }
|
---|
7080 |
|
---|
7081 | case axis_child:
|
---|
7082 | {
|
---|
7083 | for (xml_node c = n.first_child(); c; c = c.next_sibling())
|
---|
7084 | step_push(ns, c, alloc);
|
---|
7085 |
|
---|
7086 | break;
|
---|
7087 | }
|
---|
7088 |
|
---|
7089 | case axis_descendant:
|
---|
7090 | case axis_descendant_or_self:
|
---|
7091 | {
|
---|
7092 | if (axis == axis_descendant_or_self)
|
---|
7093 | step_push(ns, n, alloc);
|
---|
7094 |
|
---|
7095 | xml_node cur = n.first_child();
|
---|
7096 |
|
---|
7097 | while (cur && cur != n)
|
---|
7098 | {
|
---|
7099 | step_push(ns, cur, alloc);
|
---|
7100 |
|
---|
7101 | if (cur.first_child())
|
---|
7102 | cur = cur.first_child();
|
---|
7103 | else if (cur.next_sibling())
|
---|
7104 | cur = cur.next_sibling();
|
---|
7105 | else
|
---|
7106 | {
|
---|
7107 | while (!cur.next_sibling() && cur != n)
|
---|
7108 | cur = cur.parent();
|
---|
7109 |
|
---|
7110 | if (cur != n) cur = cur.next_sibling();
|
---|
7111 | }
|
---|
7112 | }
|
---|
7113 |
|
---|
7114 | break;
|
---|
7115 | }
|
---|
7116 |
|
---|
7117 | case axis_following_sibling:
|
---|
7118 | {
|
---|
7119 | for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
|
---|
7120 | step_push(ns, c, alloc);
|
---|
7121 |
|
---|
7122 | break;
|
---|
7123 | }
|
---|
7124 |
|
---|
7125 | case axis_preceding_sibling:
|
---|
7126 | {
|
---|
7127 | for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
|
---|
7128 | step_push(ns, c, alloc);
|
---|
7129 |
|
---|
7130 | break;
|
---|
7131 | }
|
---|
7132 |
|
---|
7133 | case axis_following:
|
---|
7134 | {
|
---|
7135 | xml_node cur = n;
|
---|
7136 |
|
---|
7137 | // exit from this node so that we don't include descendants
|
---|
7138 | while (cur && !cur.next_sibling()) cur = cur.parent();
|
---|
7139 | cur = cur.next_sibling();
|
---|
7140 |
|
---|
7141 | for (;;)
|
---|
7142 | {
|
---|
7143 | step_push(ns, cur, alloc);
|
---|
7144 |
|
---|
7145 | if (cur.first_child())
|
---|
7146 | cur = cur.first_child();
|
---|
7147 | else if (cur.next_sibling())
|
---|
7148 | cur = cur.next_sibling();
|
---|
7149 | else
|
---|
7150 | {
|
---|
7151 | while (cur && !cur.next_sibling()) cur = cur.parent();
|
---|
7152 | cur = cur.next_sibling();
|
---|
7153 |
|
---|
7154 | if (!cur) break;
|
---|
7155 | }
|
---|
7156 | }
|
---|
7157 |
|
---|
7158 | break;
|
---|
7159 | }
|
---|
7160 |
|
---|
7161 | case axis_preceding:
|
---|
7162 | {
|
---|
7163 | xml_node cur = n;
|
---|
7164 |
|
---|
7165 | while (cur && !cur.previous_sibling()) cur = cur.parent();
|
---|
7166 | cur = cur.previous_sibling();
|
---|
7167 |
|
---|
7168 | for (;;)
|
---|
7169 | {
|
---|
7170 | if (cur.last_child())
|
---|
7171 | cur = cur.last_child();
|
---|
7172 | else
|
---|
7173 | {
|
---|
7174 | // leaf node, can't be ancestor
|
---|
7175 | step_push(ns, cur, alloc);
|
---|
7176 |
|
---|
7177 | if (cur.previous_sibling())
|
---|
7178 | cur = cur.previous_sibling();
|
---|
7179 | else
|
---|
7180 | {
|
---|
7181 | do
|
---|
7182 | {
|
---|
7183 | cur = cur.parent();
|
---|
7184 | if (!cur) break;
|
---|
7185 |
|
---|
7186 | if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
|
---|
7187 | }
|
---|
7188 | while (!cur.previous_sibling());
|
---|
7189 |
|
---|
7190 | cur = cur.previous_sibling();
|
---|
7191 |
|
---|
7192 | if (!cur) break;
|
---|
7193 | }
|
---|
7194 | }
|
---|
7195 | }
|
---|
7196 |
|
---|
7197 | break;
|
---|
7198 | }
|
---|
7199 |
|
---|
7200 | case axis_ancestor:
|
---|
7201 | case axis_ancestor_or_self:
|
---|
7202 | {
|
---|
7203 | if (axis == axis_ancestor_or_self)
|
---|
7204 | step_push(ns, n, alloc);
|
---|
7205 |
|
---|
7206 | xml_node cur = n.parent();
|
---|
7207 |
|
---|
7208 | while (cur)
|
---|
7209 | {
|
---|
7210 | step_push(ns, cur, alloc);
|
---|
7211 |
|
---|
7212 | cur = cur.parent();
|
---|
7213 | }
|
---|
7214 |
|
---|
7215 | break;
|
---|
7216 | }
|
---|
7217 |
|
---|
7218 | case axis_self:
|
---|
7219 | {
|
---|
7220 | step_push(ns, n, alloc);
|
---|
7221 |
|
---|
7222 | break;
|
---|
7223 | }
|
---|
7224 |
|
---|
7225 | case axis_parent:
|
---|
7226 | {
|
---|
7227 | if (n.parent()) step_push(ns, n.parent(), alloc);
|
---|
7228 |
|
---|
7229 | break;
|
---|
7230 | }
|
---|
7231 |
|
---|
7232 | default:
|
---|
7233 | assert(!"Unimplemented axis");
|
---|
7234 | }
|
---|
7235 | }
|
---|
7236 |
|
---|
7237 | template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)
|
---|
7238 | {
|
---|
7239 | const axis_t axis = T::axis;
|
---|
7240 |
|
---|
7241 | switch (axis)
|
---|
7242 | {
|
---|
7243 | case axis_ancestor:
|
---|
7244 | case axis_ancestor_or_self:
|
---|
7245 | {
|
---|
7246 | if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
|
---|
7247 | step_push(ns, a, p, alloc);
|
---|
7248 |
|
---|
7249 | xml_node cur = p;
|
---|
7250 |
|
---|
7251 | while (cur)
|
---|
7252 | {
|
---|
7253 | step_push(ns, cur, alloc);
|
---|
7254 |
|
---|
7255 | cur = cur.parent();
|
---|
7256 | }
|
---|
7257 |
|
---|
7258 | break;
|
---|
7259 | }
|
---|
7260 |
|
---|
7261 | case axis_descendant_or_self:
|
---|
7262 | case axis_self:
|
---|
7263 | {
|
---|
7264 | if (_test == nodetest_type_node) // reject attributes based on principal node type test
|
---|
7265 | step_push(ns, a, p, alloc);
|
---|
7266 |
|
---|
7267 | break;
|
---|
7268 | }
|
---|
7269 |
|
---|
7270 | case axis_following:
|
---|
7271 | {
|
---|
7272 | xml_node cur = p;
|
---|
7273 |
|
---|
7274 | for (;;)
|
---|
7275 | {
|
---|
7276 | if (cur.first_child())
|
---|
7277 | cur = cur.first_child();
|
---|
7278 | else if (cur.next_sibling())
|
---|
7279 | cur = cur.next_sibling();
|
---|
7280 | else
|
---|
7281 | {
|
---|
7282 | while (cur && !cur.next_sibling()) cur = cur.parent();
|
---|
7283 | cur = cur.next_sibling();
|
---|
7284 |
|
---|
7285 | if (!cur) break;
|
---|
7286 | }
|
---|
7287 |
|
---|
7288 | step_push(ns, cur, alloc);
|
---|
7289 | }
|
---|
7290 |
|
---|
7291 | break;
|
---|
7292 | }
|
---|
7293 |
|
---|
7294 | case axis_parent:
|
---|
7295 | {
|
---|
7296 | step_push(ns, p, alloc);
|
---|
7297 |
|
---|
7298 | break;
|
---|
7299 | }
|
---|
7300 |
|
---|
7301 | case axis_preceding:
|
---|
7302 | {
|
---|
7303 | // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
|
---|
7304 | step_fill(ns, p, alloc, v);
|
---|
7305 | break;
|
---|
7306 | }
|
---|
7307 |
|
---|
7308 | default:
|
---|
7309 | assert(!"Unimplemented axis");
|
---|
7310 | }
|
---|
7311 | }
|
---|
7312 |
|
---|
7313 | template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
|
---|
7314 | {
|
---|
7315 | const axis_t axis = T::axis;
|
---|
7316 | bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
|
---|
7317 |
|
---|
7318 | xpath_node_set_raw ns;
|
---|
7319 | ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
|
---|
7320 |
|
---|
7321 | if (_left)
|
---|
7322 | {
|
---|
7323 | xpath_node_set_raw s = _left->eval_node_set(c, stack);
|
---|
7324 |
|
---|
7325 | // self axis preserves the original order
|
---|
7326 | if (axis == axis_self) ns.set_type(s.type());
|
---|
7327 |
|
---|
7328 | for (const xpath_node* it = s.begin(); it != s.end(); ++it)
|
---|
7329 | {
|
---|
7330 | size_t size = ns.size();
|
---|
7331 |
|
---|
7332 | // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
|
---|
7333 | if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
|
---|
7334 |
|
---|
7335 | if (it->node())
|
---|
7336 | step_fill(ns, it->node(), stack.result, v);
|
---|
7337 | else if (attributes)
|
---|
7338 | step_fill(ns, it->attribute(), it->parent(), stack.result, v);
|
---|
7339 |
|
---|
7340 | apply_predicates(ns, size, stack);
|
---|
7341 | }
|
---|
7342 | }
|
---|
7343 | else
|
---|
7344 | {
|
---|
7345 | if (c.n.node())
|
---|
7346 | step_fill(ns, c.n.node(), stack.result, v);
|
---|
7347 | else if (attributes)
|
---|
7348 | step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
|
---|
7349 |
|
---|
7350 | apply_predicates(ns, 0, stack);
|
---|
7351 | }
|
---|
7352 |
|
---|
7353 | // child, attribute and self axes always generate unique set of nodes
|
---|
7354 | // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
|
---|
7355 | if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
|
---|
7356 | ns.remove_duplicates();
|
---|
7357 |
|
---|
7358 | return ns;
|
---|
7359 | }
|
---|
7360 |
|
---|
7361 | public:
|
---|
7362 | xpath_ast_node(ast_type_t type, xpath_value_type rettype, const char_t* value):
|
---|
7363 | _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
|
---|
7364 | {
|
---|
7365 | assert(type == ast_string_constant);
|
---|
7366 | _data.string = value;
|
---|
7367 | }
|
---|
7368 |
|
---|
7369 | xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value):
|
---|
7370 | _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
|
---|
7371 | {
|
---|
7372 | assert(type == ast_number_constant);
|
---|
7373 | _data.number = value;
|
---|
7374 | }
|
---|
7375 |
|
---|
7376 | xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value):
|
---|
7377 | _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
|
---|
7378 | {
|
---|
7379 | assert(type == ast_variable);
|
---|
7380 | _data.variable = value;
|
---|
7381 | }
|
---|
7382 |
|
---|
7383 | xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
|
---|
7384 | _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(left), _right(right), _next(0)
|
---|
7385 | {
|
---|
7386 | }
|
---|
7387 |
|
---|
7388 | xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
|
---|
7389 | _type((char)type), _rettype(xpath_type_node_set), _axis((char)axis), _test((char)test), _left(left), _right(0), _next(0)
|
---|
7390 | {
|
---|
7391 | _data.nodetest = contents;
|
---|
7392 | }
|
---|
7393 |
|
---|
7394 | void set_next(xpath_ast_node* value)
|
---|
7395 | {
|
---|
7396 | _next = value;
|
---|
7397 | }
|
---|
7398 |
|
---|
7399 | void set_right(xpath_ast_node* value)
|
---|
7400 | {
|
---|
7401 | _right = value;
|
---|
7402 | }
|
---|
7403 |
|
---|
7404 | bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
|
---|
7405 | {
|
---|
7406 | switch (_type)
|
---|
7407 | {
|
---|
7408 | case ast_op_or:
|
---|
7409 | return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
|
---|
7410 |
|
---|
7411 | case ast_op_and:
|
---|
7412 | return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
|
---|
7413 |
|
---|
7414 | case ast_op_equal:
|
---|
7415 | return compare_eq(_left, _right, c, stack, equal_to());
|
---|
7416 |
|
---|
7417 | case ast_op_not_equal:
|
---|
7418 | return compare_eq(_left, _right, c, stack, not_equal_to());
|
---|
7419 |
|
---|
7420 | case ast_op_less:
|
---|
7421 | return compare_rel(_left, _right, c, stack, less());
|
---|
7422 |
|
---|
7423 | case ast_op_greater:
|
---|
7424 | return compare_rel(_right, _left, c, stack, less());
|
---|
7425 |
|
---|
7426 | case ast_op_less_or_equal:
|
---|
7427 | return compare_rel(_left, _right, c, stack, less_equal());
|
---|
7428 |
|
---|
7429 | case ast_op_greater_or_equal:
|
---|
7430 | return compare_rel(_right, _left, c, stack, less_equal());
|
---|
7431 |
|
---|
7432 | case ast_func_starts_with:
|
---|
7433 | {
|
---|
7434 | xpath_allocator_capture cr(stack.result);
|
---|
7435 |
|
---|
7436 | xpath_string lr = _left->eval_string(c, stack);
|
---|
7437 | xpath_string rr = _right->eval_string(c, stack);
|
---|
7438 |
|
---|
7439 | return starts_with(lr.c_str(), rr.c_str());
|
---|
7440 | }
|
---|
7441 |
|
---|
7442 | case ast_func_contains:
|
---|
7443 | {
|
---|
7444 | xpath_allocator_capture cr(stack.result);
|
---|
7445 |
|
---|
7446 | xpath_string lr = _left->eval_string(c, stack);
|
---|
7447 | xpath_string rr = _right->eval_string(c, stack);
|
---|
7448 |
|
---|
7449 | return find_substring(lr.c_str(), rr.c_str()) != 0;
|
---|
7450 | }
|
---|
7451 |
|
---|
7452 | case ast_func_boolean:
|
---|
7453 | return _left->eval_boolean(c, stack);
|
---|
7454 |
|
---|
7455 | case ast_func_not:
|
---|
7456 | return !_left->eval_boolean(c, stack);
|
---|
7457 |
|
---|
7458 | case ast_func_true:
|
---|
7459 | return true;
|
---|
7460 |
|
---|
7461 | case ast_func_false:
|
---|
7462 | return false;
|
---|
7463 |
|
---|
7464 | case ast_func_lang:
|
---|
7465 | {
|
---|
7466 | if (c.n.attribute()) return false;
|
---|
7467 |
|
---|
7468 | xpath_allocator_capture cr(stack.result);
|
---|
7469 |
|
---|
7470 | xpath_string lang = _left->eval_string(c, stack);
|
---|
7471 |
|
---|
7472 | for (xml_node n = c.n.node(); n; n = n.parent())
|
---|
7473 | {
|
---|
7474 | xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
|
---|
7475 |
|
---|
7476 | if (a)
|
---|
7477 | {
|
---|
7478 | const char_t* value = a.value();
|
---|
7479 |
|
---|
7480 | // strnicmp / strncasecmp is not portable
|
---|
7481 | for (const char_t* lit = lang.c_str(); *lit; ++lit)
|
---|
7482 | {
|
---|
7483 | if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
|
---|
7484 | ++value;
|
---|
7485 | }
|
---|
7486 |
|
---|
7487 | return *value == 0 || *value == '-';
|
---|
7488 | }
|
---|
7489 | }
|
---|
7490 |
|
---|
7491 | return false;
|
---|
7492 | }
|
---|
7493 |
|
---|
7494 | case ast_variable:
|
---|
7495 | {
|
---|
7496 | assert(_rettype == _data.variable->type());
|
---|
7497 |
|
---|
7498 | if (_rettype == xpath_type_boolean)
|
---|
7499 | return _data.variable->get_boolean();
|
---|
7500 |
|
---|
7501 | // fallthrough to type conversion
|
---|
7502 | }
|
---|
7503 |
|
---|
7504 | default:
|
---|
7505 | {
|
---|
7506 | switch (_rettype)
|
---|
7507 | {
|
---|
7508 | case xpath_type_number:
|
---|
7509 | return convert_number_to_boolean(eval_number(c, stack));
|
---|
7510 |
|
---|
7511 | case xpath_type_string:
|
---|
7512 | {
|
---|
7513 | xpath_allocator_capture cr(stack.result);
|
---|
7514 |
|
---|
7515 | return !eval_string(c, stack).empty();
|
---|
7516 | }
|
---|
7517 |
|
---|
7518 | case xpath_type_node_set:
|
---|
7519 | {
|
---|
7520 | xpath_allocator_capture cr(stack.result);
|
---|
7521 |
|
---|
7522 | return !eval_node_set(c, stack).empty();
|
---|
7523 | }
|
---|
7524 |
|
---|
7525 | default:
|
---|
7526 | assert(!"Wrong expression for return type boolean");
|
---|
7527 | return false;
|
---|
7528 | }
|
---|
7529 | }
|
---|
7530 | }
|
---|
7531 | }
|
---|
7532 |
|
---|
7533 | double eval_number(const xpath_context& c, const xpath_stack& stack)
|
---|
7534 | {
|
---|
7535 | switch (_type)
|
---|
7536 | {
|
---|
7537 | case ast_op_add:
|
---|
7538 | return _left->eval_number(c, stack) + _right->eval_number(c, stack);
|
---|
7539 |
|
---|
7540 | case ast_op_subtract:
|
---|
7541 | return _left->eval_number(c, stack) - _right->eval_number(c, stack);
|
---|
7542 |
|
---|
7543 | case ast_op_multiply:
|
---|
7544 | return _left->eval_number(c, stack) * _right->eval_number(c, stack);
|
---|
7545 |
|
---|
7546 | case ast_op_divide:
|
---|
7547 | return _left->eval_number(c, stack) / _right->eval_number(c, stack);
|
---|
7548 |
|
---|
7549 | case ast_op_mod:
|
---|
7550 | return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
|
---|
7551 |
|
---|
7552 | case ast_op_negate:
|
---|
7553 | return -_left->eval_number(c, stack);
|
---|
7554 |
|
---|
7555 | case ast_number_constant:
|
---|
7556 | return _data.number;
|
---|
7557 |
|
---|
7558 | case ast_func_last:
|
---|
7559 | return (double)c.size;
|
---|
7560 |
|
---|
7561 | case ast_func_position:
|
---|
7562 | return (double)c.position;
|
---|
7563 |
|
---|
7564 | case ast_func_count:
|
---|
7565 | {
|
---|
7566 | xpath_allocator_capture cr(stack.result);
|
---|
7567 |
|
---|
7568 | return (double)_left->eval_node_set(c, stack).size();
|
---|
7569 | }
|
---|
7570 |
|
---|
7571 | case ast_func_string_length_0:
|
---|
7572 | {
|
---|
7573 | xpath_allocator_capture cr(stack.result);
|
---|
7574 |
|
---|
7575 | return (double)string_value(c.n, stack.result).length();
|
---|
7576 | }
|
---|
7577 |
|
---|
7578 | case ast_func_string_length_1:
|
---|
7579 | {
|
---|
7580 | xpath_allocator_capture cr(stack.result);
|
---|
7581 |
|
---|
7582 | return (double)_left->eval_string(c, stack).length();
|
---|
7583 | }
|
---|
7584 |
|
---|
7585 | case ast_func_number_0:
|
---|
7586 | {
|
---|
7587 | xpath_allocator_capture cr(stack.result);
|
---|
7588 |
|
---|
7589 | return convert_string_to_number(string_value(c.n, stack.result).c_str());
|
---|
7590 | }
|
---|
7591 |
|
---|
7592 | case ast_func_number_1:
|
---|
7593 | return _left->eval_number(c, stack);
|
---|
7594 |
|
---|
7595 | case ast_func_sum:
|
---|
7596 | {
|
---|
7597 | xpath_allocator_capture cr(stack.result);
|
---|
7598 |
|
---|
7599 | double r = 0;
|
---|
7600 |
|
---|
7601 | xpath_node_set_raw ns = _left->eval_node_set(c, stack);
|
---|
7602 |
|
---|
7603 | for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
|
---|
7604 | {
|
---|
7605 | xpath_allocator_capture cri(stack.result);
|
---|
7606 |
|
---|
7607 | r += convert_string_to_number(string_value(*it, stack.result).c_str());
|
---|
7608 | }
|
---|
7609 |
|
---|
7610 | return r;
|
---|
7611 | }
|
---|
7612 |
|
---|
7613 | case ast_func_floor:
|
---|
7614 | {
|
---|
7615 | double r = _left->eval_number(c, stack);
|
---|
7616 |
|
---|
7617 | return r == r ? floor(r) : r;
|
---|
7618 | }
|
---|
7619 |
|
---|
7620 | case ast_func_ceiling:
|
---|
7621 | {
|
---|
7622 | double r = _left->eval_number(c, stack);
|
---|
7623 |
|
---|
7624 | return r == r ? ceil(r) : r;
|
---|
7625 | }
|
---|
7626 |
|
---|
7627 | case ast_func_round:
|
---|
7628 | return round_nearest_nzero(_left->eval_number(c, stack));
|
---|
7629 |
|
---|
7630 | case ast_variable:
|
---|
7631 | {
|
---|
7632 | assert(_rettype == _data.variable->type());
|
---|
7633 |
|
---|
7634 | if (_rettype == xpath_type_number)
|
---|
7635 | return _data.variable->get_number();
|
---|
7636 |
|
---|
7637 | // fallthrough to type conversion
|
---|
7638 | }
|
---|
7639 |
|
---|
7640 | default:
|
---|
7641 | {
|
---|
7642 | switch (_rettype)
|
---|
7643 | {
|
---|
7644 | case xpath_type_boolean:
|
---|
7645 | return eval_boolean(c, stack) ? 1 : 0;
|
---|
7646 |
|
---|
7647 | case xpath_type_string:
|
---|
7648 | {
|
---|
7649 | xpath_allocator_capture cr(stack.result);
|
---|
7650 |
|
---|
7651 | return convert_string_to_number(eval_string(c, stack).c_str());
|
---|
7652 | }
|
---|
7653 |
|
---|
7654 | case xpath_type_node_set:
|
---|
7655 | {
|
---|
7656 | xpath_allocator_capture cr(stack.result);
|
---|
7657 |
|
---|
7658 | return convert_string_to_number(eval_string(c, stack).c_str());
|
---|
7659 | }
|
---|
7660 |
|
---|
7661 | default:
|
---|
7662 | assert(!"Wrong expression for return type number");
|
---|
7663 | return 0;
|
---|
7664 | }
|
---|
7665 |
|
---|
7666 | }
|
---|
7667 | }
|
---|
7668 | }
|
---|
7669 |
|
---|
7670 | xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
|
---|
7671 | {
|
---|
7672 | assert(_type == ast_func_concat);
|
---|
7673 |
|
---|
7674 | xpath_allocator_capture ct(stack.temp);
|
---|
7675 |
|
---|
7676 | // count the string number
|
---|
7677 | size_t count = 1;
|
---|
7678 | for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
|
---|
7679 |
|
---|
7680 | // gather all strings
|
---|
7681 | xpath_string static_buffer[4];
|
---|
7682 | xpath_string* buffer = static_buffer;
|
---|
7683 |
|
---|
7684 | // allocate on-heap for large concats
|
---|
7685 | if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
|
---|
7686 | {
|
---|
7687 | buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
|
---|
7688 | assert(buffer);
|
---|
7689 | }
|
---|
7690 |
|
---|
7691 | // evaluate all strings to temporary stack
|
---|
7692 | xpath_stack swapped_stack = {stack.temp, stack.result};
|
---|
7693 |
|
---|
7694 | buffer[0] = _left->eval_string(c, swapped_stack);
|
---|
7695 |
|
---|
7696 | size_t pos = 1;
|
---|
7697 | for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
|
---|
7698 | assert(pos == count);
|
---|
7699 |
|
---|
7700 | // get total length
|
---|
7701 | size_t length = 0;
|
---|
7702 | for (size_t i = 0; i < count; ++i) length += buffer[i].length();
|
---|
7703 |
|
---|
7704 | // create final string
|
---|
7705 | char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
|
---|
7706 | assert(result);
|
---|
7707 |
|
---|
7708 | char_t* ri = result;
|
---|
7709 |
|
---|
7710 | for (size_t j = 0; j < count; ++j)
|
---|
7711 | for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
|
---|
7712 | *ri++ = *bi;
|
---|
7713 |
|
---|
7714 | *ri = 0;
|
---|
7715 |
|
---|
7716 | return xpath_string(result, true);
|
---|
7717 | }
|
---|
7718 |
|
---|
7719 | xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
|
---|
7720 | {
|
---|
7721 | switch (_type)
|
---|
7722 | {
|
---|
7723 | case ast_string_constant:
|
---|
7724 | return xpath_string_const(_data.string);
|
---|
7725 |
|
---|
7726 | case ast_func_local_name_0:
|
---|
7727 | {
|
---|
7728 | xpath_node na = c.n;
|
---|
7729 |
|
---|
7730 | return xpath_string_const(local_name(na));
|
---|
7731 | }
|
---|
7732 |
|
---|
7733 | case ast_func_local_name_1:
|
---|
7734 | {
|
---|
7735 | xpath_allocator_capture cr(stack.result);
|
---|
7736 |
|
---|
7737 | xpath_node_set_raw ns = _left->eval_node_set(c, stack);
|
---|
7738 | xpath_node na = ns.first();
|
---|
7739 |
|
---|
7740 | return xpath_string_const(local_name(na));
|
---|
7741 | }
|
---|
7742 |
|
---|
7743 | case ast_func_name_0:
|
---|
7744 | {
|
---|
7745 | xpath_node na = c.n;
|
---|
7746 |
|
---|
7747 | return xpath_string_const(qualified_name(na));
|
---|
7748 | }
|
---|
7749 |
|
---|
7750 | case ast_func_name_1:
|
---|
7751 | {
|
---|
7752 | xpath_allocator_capture cr(stack.result);
|
---|
7753 |
|
---|
7754 | xpath_node_set_raw ns = _left->eval_node_set(c, stack);
|
---|
7755 | xpath_node na = ns.first();
|
---|
7756 |
|
---|
7757 | return xpath_string_const(qualified_name(na));
|
---|
7758 | }
|
---|
7759 |
|
---|
7760 | case ast_func_namespace_uri_0:
|
---|
7761 | {
|
---|
7762 | xpath_node na = c.n;
|
---|
7763 |
|
---|
7764 | return xpath_string_const(namespace_uri(na));
|
---|
7765 | }
|
---|
7766 |
|
---|
7767 | case ast_func_namespace_uri_1:
|
---|
7768 | {
|
---|
7769 | xpath_allocator_capture cr(stack.result);
|
---|
7770 |
|
---|
7771 | xpath_node_set_raw ns = _left->eval_node_set(c, stack);
|
---|
7772 | xpath_node na = ns.first();
|
---|
7773 |
|
---|
7774 | return xpath_string_const(namespace_uri(na));
|
---|
7775 | }
|
---|
7776 |
|
---|
7777 | case ast_func_string_0:
|
---|
7778 | return string_value(c.n, stack.result);
|
---|
7779 |
|
---|
7780 | case ast_func_string_1:
|
---|
7781 | return _left->eval_string(c, stack);
|
---|
7782 |
|
---|
7783 | case ast_func_concat:
|
---|
7784 | return eval_string_concat(c, stack);
|
---|
7785 |
|
---|
7786 | case ast_func_substring_before:
|
---|
7787 | {
|
---|
7788 | xpath_allocator_capture cr(stack.temp);
|
---|
7789 |
|
---|
7790 | xpath_stack swapped_stack = {stack.temp, stack.result};
|
---|
7791 |
|
---|
7792 | xpath_string s = _left->eval_string(c, swapped_stack);
|
---|
7793 | xpath_string p = _right->eval_string(c, swapped_stack);
|
---|
7794 |
|
---|
7795 | const char_t* pos = find_substring(s.c_str(), p.c_str());
|
---|
7796 |
|
---|
7797 | return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
|
---|
7798 | }
|
---|
7799 |
|
---|
7800 | case ast_func_substring_after:
|
---|
7801 | {
|
---|
7802 | xpath_allocator_capture cr(stack.temp);
|
---|
7803 |
|
---|
7804 | xpath_stack swapped_stack = {stack.temp, stack.result};
|
---|
7805 |
|
---|
7806 | xpath_string s = _left->eval_string(c, swapped_stack);
|
---|
7807 | xpath_string p = _right->eval_string(c, swapped_stack);
|
---|
7808 |
|
---|
7809 | const char_t* pos = find_substring(s.c_str(), p.c_str());
|
---|
7810 | if (!pos) return xpath_string();
|
---|
7811 |
|
---|
7812 | const char_t* result = pos + p.length();
|
---|
7813 |
|
---|
7814 | return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
|
---|
7815 | }
|
---|
7816 |
|
---|
7817 | case ast_func_substring_2:
|
---|
7818 | {
|
---|
7819 | xpath_allocator_capture cr(stack.temp);
|
---|
7820 |
|
---|
7821 | xpath_stack swapped_stack = {stack.temp, stack.result};
|
---|
7822 |
|
---|
7823 | xpath_string s = _left->eval_string(c, swapped_stack);
|
---|
7824 | size_t s_length = s.length();
|
---|
7825 |
|
---|
7826 | double first = round_nearest(_right->eval_number(c, stack));
|
---|
7827 |
|
---|
7828 | if (is_nan(first)) return xpath_string(); // NaN
|
---|
7829 | else if (first >= s_length + 1) return xpath_string();
|
---|
7830 |
|
---|
7831 | size_t pos = first < 1 ? 1 : (size_t)first;
|
---|
7832 | assert(1 <= pos && pos <= s_length + 1);
|
---|
7833 |
|
---|
7834 | const char_t* rbegin = s.c_str() + (pos - 1);
|
---|
7835 |
|
---|
7836 | return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
|
---|
7837 | }
|
---|
7838 |
|
---|
7839 | case ast_func_substring_3:
|
---|
7840 | {
|
---|
7841 | xpath_allocator_capture cr(stack.temp);
|
---|
7842 |
|
---|
7843 | xpath_stack swapped_stack = {stack.temp, stack.result};
|
---|
7844 |
|
---|
7845 | xpath_string s = _left->eval_string(c, swapped_stack);
|
---|
7846 | size_t s_length = s.length();
|
---|
7847 |
|
---|
7848 | double first = round_nearest(_right->eval_number(c, stack));
|
---|
7849 | double last = first + round_nearest(_right->_next->eval_number(c, stack));
|
---|
7850 |
|
---|
7851 | if (is_nan(first) || is_nan(last)) return xpath_string();
|
---|
7852 | else if (first >= s_length + 1) return xpath_string();
|
---|
7853 | else if (first >= last) return xpath_string();
|
---|
7854 | else if (last < 1) return xpath_string();
|
---|
7855 |
|
---|
7856 | size_t pos = first < 1 ? 1 : (size_t)first;
|
---|
7857 | size_t end = last >= s_length + 1 ? s_length + 1 : (size_t)last;
|
---|
7858 |
|
---|
7859 | assert(1 <= pos && pos <= end && end <= s_length + 1);
|
---|
7860 | const char_t* rbegin = s.c_str() + (pos - 1);
|
---|
7861 | const char_t* rend = s.c_str() + (end - 1);
|
---|
7862 |
|
---|
7863 | return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
|
---|
7864 | }
|
---|
7865 |
|
---|
7866 | case ast_func_normalize_space_0:
|
---|
7867 | {
|
---|
7868 | xpath_string s = string_value(c.n, stack.result);
|
---|
7869 |
|
---|
7870 | normalize_space(s.data(stack.result));
|
---|
7871 |
|
---|
7872 | return s;
|
---|
7873 | }
|
---|
7874 |
|
---|
7875 | case ast_func_normalize_space_1:
|
---|
7876 | {
|
---|
7877 | xpath_string s = _left->eval_string(c, stack);
|
---|
7878 |
|
---|
7879 | normalize_space(s.data(stack.result));
|
---|
7880 |
|
---|
7881 | return s;
|
---|
7882 | }
|
---|
7883 |
|
---|
7884 | case ast_func_translate:
|
---|
7885 | {
|
---|
7886 | xpath_allocator_capture cr(stack.temp);
|
---|
7887 |
|
---|
7888 | xpath_stack swapped_stack = {stack.temp, stack.result};
|
---|
7889 |
|
---|
7890 | xpath_string s = _left->eval_string(c, stack);
|
---|
7891 | xpath_string from = _right->eval_string(c, swapped_stack);
|
---|
7892 | xpath_string to = _right->_next->eval_string(c, swapped_stack);
|
---|
7893 |
|
---|
7894 | translate(s.data(stack.result), from.c_str(), to.c_str());
|
---|
7895 |
|
---|
7896 | return s;
|
---|
7897 | }
|
---|
7898 |
|
---|
7899 | case ast_variable:
|
---|
7900 | {
|
---|
7901 | assert(_rettype == _data.variable->type());
|
---|
7902 |
|
---|
7903 | if (_rettype == xpath_type_string)
|
---|
7904 | return xpath_string_const(_data.variable->get_string());
|
---|
7905 |
|
---|
7906 | // fallthrough to type conversion
|
---|
7907 | }
|
---|
7908 |
|
---|
7909 | default:
|
---|
7910 | {
|
---|
7911 | switch (_rettype)
|
---|
7912 | {
|
---|
7913 | case xpath_type_boolean:
|
---|
7914 | return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
|
---|
7915 |
|
---|
7916 | case xpath_type_number:
|
---|
7917 | return convert_number_to_string(eval_number(c, stack), stack.result);
|
---|
7918 |
|
---|
7919 | case xpath_type_node_set:
|
---|
7920 | {
|
---|
7921 | xpath_allocator_capture cr(stack.temp);
|
---|
7922 |
|
---|
7923 | xpath_stack swapped_stack = {stack.temp, stack.result};
|
---|
7924 |
|
---|
7925 | xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
|
---|
7926 | return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
|
---|
7927 | }
|
---|
7928 |
|
---|
7929 | default:
|
---|
7930 | assert(!"Wrong expression for return type string");
|
---|
7931 | return xpath_string();
|
---|
7932 | }
|
---|
7933 | }
|
---|
7934 | }
|
---|
7935 | }
|
---|
7936 |
|
---|
7937 | xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
|
---|
7938 | {
|
---|
7939 | switch (_type)
|
---|
7940 | {
|
---|
7941 | case ast_op_union:
|
---|
7942 | {
|
---|
7943 | xpath_allocator_capture cr(stack.temp);
|
---|
7944 |
|
---|
7945 | xpath_stack swapped_stack = {stack.temp, stack.result};
|
---|
7946 |
|
---|
7947 | xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
|
---|
7948 | xpath_node_set_raw rs = _right->eval_node_set(c, stack);
|
---|
7949 |
|
---|
7950 | // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
|
---|
7951 | rs.set_type(xpath_node_set::type_unsorted);
|
---|
7952 |
|
---|
7953 | rs.append(ls.begin(), ls.end(), stack.result);
|
---|
7954 | rs.remove_duplicates();
|
---|
7955 |
|
---|
7956 | return rs;
|
---|
7957 | }
|
---|
7958 |
|
---|
7959 | case ast_filter:
|
---|
7960 | case ast_filter_posinv:
|
---|
7961 | {
|
---|
7962 | xpath_node_set_raw set = _left->eval_node_set(c, stack);
|
---|
7963 |
|
---|
7964 | // either expression is a number or it contains position() call; sort by document order
|
---|
7965 | if (_type == ast_filter) set.sort_do();
|
---|
7966 |
|
---|
7967 | apply_predicate(set, 0, _right, stack);
|
---|
7968 |
|
---|
7969 | return set;
|
---|
7970 | }
|
---|
7971 |
|
---|
7972 | case ast_func_id:
|
---|
7973 | return xpath_node_set_raw();
|
---|
7974 |
|
---|
7975 | case ast_step:
|
---|
7976 | {
|
---|
7977 | switch (_axis)
|
---|
7978 | {
|
---|
7979 | case axis_ancestor:
|
---|
7980 | return step_do(c, stack, axis_to_type<axis_ancestor>());
|
---|
7981 |
|
---|
7982 | case axis_ancestor_or_self:
|
---|
7983 | return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
|
---|
7984 |
|
---|
7985 | case axis_attribute:
|
---|
7986 | return step_do(c, stack, axis_to_type<axis_attribute>());
|
---|
7987 |
|
---|
7988 | case axis_child:
|
---|
7989 | return step_do(c, stack, axis_to_type<axis_child>());
|
---|
7990 |
|
---|
7991 | case axis_descendant:
|
---|
7992 | return step_do(c, stack, axis_to_type<axis_descendant>());
|
---|
7993 |
|
---|
7994 | case axis_descendant_or_self:
|
---|
7995 | return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
|
---|
7996 |
|
---|
7997 | case axis_following:
|
---|
7998 | return step_do(c, stack, axis_to_type<axis_following>());
|
---|
7999 |
|
---|
8000 | case axis_following_sibling:
|
---|
8001 | return step_do(c, stack, axis_to_type<axis_following_sibling>());
|
---|
8002 |
|
---|
8003 | case axis_namespace:
|
---|
8004 | // namespaced axis is not supported
|
---|
8005 | return xpath_node_set_raw();
|
---|
8006 |
|
---|
8007 | case axis_parent:
|
---|
8008 | return step_do(c, stack, axis_to_type<axis_parent>());
|
---|
8009 |
|
---|
8010 | case axis_preceding:
|
---|
8011 | return step_do(c, stack, axis_to_type<axis_preceding>());
|
---|
8012 |
|
---|
8013 | case axis_preceding_sibling:
|
---|
8014 | return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
|
---|
8015 |
|
---|
8016 | case axis_self:
|
---|
8017 | return step_do(c, stack, axis_to_type<axis_self>());
|
---|
8018 | }
|
---|
8019 | }
|
---|
8020 |
|
---|
8021 | case ast_step_root:
|
---|
8022 | {
|
---|
8023 | assert(!_right); // root step can't have any predicates
|
---|
8024 |
|
---|
8025 | xpath_node_set_raw ns;
|
---|
8026 |
|
---|
8027 | ns.set_type(xpath_node_set::type_sorted);
|
---|
8028 |
|
---|
8029 | if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
|
---|
8030 | else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
|
---|
8031 |
|
---|
8032 | return ns;
|
---|
8033 | }
|
---|
8034 |
|
---|
8035 | case ast_variable:
|
---|
8036 | {
|
---|
8037 | assert(_rettype == _data.variable->type());
|
---|
8038 |
|
---|
8039 | if (_rettype == xpath_type_node_set)
|
---|
8040 | {
|
---|
8041 | const xpath_node_set& s = _data.variable->get_node_set();
|
---|
8042 |
|
---|
8043 | xpath_node_set_raw ns;
|
---|
8044 |
|
---|
8045 | ns.set_type(s.type());
|
---|
8046 | ns.append(s.begin(), s.end(), stack.result);
|
---|
8047 |
|
---|
8048 | return ns;
|
---|
8049 | }
|
---|
8050 |
|
---|
8051 | // fallthrough to type conversion
|
---|
8052 | }
|
---|
8053 |
|
---|
8054 | default:
|
---|
8055 | assert(!"Wrong expression for return type node set");
|
---|
8056 | return xpath_node_set_raw();
|
---|
8057 | }
|
---|
8058 | }
|
---|
8059 |
|
---|
8060 | bool is_posinv()
|
---|
8061 | {
|
---|
8062 | switch (_type)
|
---|
8063 | {
|
---|
8064 | case ast_func_position:
|
---|
8065 | return false;
|
---|
8066 |
|
---|
8067 | case ast_string_constant:
|
---|
8068 | case ast_number_constant:
|
---|
8069 | case ast_variable:
|
---|
8070 | return true;
|
---|
8071 |
|
---|
8072 | case ast_step:
|
---|
8073 | case ast_step_root:
|
---|
8074 | return true;
|
---|
8075 |
|
---|
8076 | case ast_predicate:
|
---|
8077 | case ast_filter:
|
---|
8078 | case ast_filter_posinv:
|
---|
8079 | return true;
|
---|
8080 |
|
---|
8081 | default:
|
---|
8082 | if (_left && !_left->is_posinv()) return false;
|
---|
8083 |
|
---|
8084 | for (xpath_ast_node* n = _right; n; n = n->_next)
|
---|
8085 | if (!n->is_posinv()) return false;
|
---|
8086 |
|
---|
8087 | return true;
|
---|
8088 | }
|
---|
8089 | }
|
---|
8090 |
|
---|
8091 | xpath_value_type rettype() const
|
---|
8092 | {
|
---|
8093 | return static_cast<xpath_value_type>(_rettype);
|
---|
8094 | }
|
---|
8095 | };
|
---|
8096 |
|
---|
8097 | struct xpath_parser
|
---|
8098 | {
|
---|
8099 | xpath_allocator* _alloc;
|
---|
8100 | xpath_lexer _lexer;
|
---|
8101 |
|
---|
8102 | const char_t* _query;
|
---|
8103 | xpath_variable_set* _variables;
|
---|
8104 |
|
---|
8105 | xpath_parse_result* _result;
|
---|
8106 |
|
---|
8107 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
8108 | jmp_buf _error_handler;
|
---|
8109 | #endif
|
---|
8110 |
|
---|
8111 | void throw_error(const char* message)
|
---|
8112 | {
|
---|
8113 | _result->error = message;
|
---|
8114 | _result->offset = _lexer.current_pos() - _query;
|
---|
8115 |
|
---|
8116 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
8117 | longjmp(_error_handler, 1);
|
---|
8118 | #else
|
---|
8119 | throw xpath_exception(*_result);
|
---|
8120 | #endif
|
---|
8121 | }
|
---|
8122 |
|
---|
8123 | void throw_error_oom()
|
---|
8124 | {
|
---|
8125 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
8126 | throw_error("Out of memory");
|
---|
8127 | #else
|
---|
8128 | throw std::bad_alloc();
|
---|
8129 | #endif
|
---|
8130 | }
|
---|
8131 |
|
---|
8132 | void* alloc_node()
|
---|
8133 | {
|
---|
8134 | void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
|
---|
8135 |
|
---|
8136 | if (!result) throw_error_oom();
|
---|
8137 |
|
---|
8138 | return result;
|
---|
8139 | }
|
---|
8140 |
|
---|
8141 | const char_t* alloc_string(const xpath_lexer_string& value)
|
---|
8142 | {
|
---|
8143 | if (value.begin)
|
---|
8144 | {
|
---|
8145 | size_t length = static_cast<size_t>(value.end - value.begin);
|
---|
8146 |
|
---|
8147 | char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
|
---|
8148 | if (!c) throw_error_oom();
|
---|
8149 |
|
---|
8150 | memcpy(c, value.begin, length * sizeof(char_t));
|
---|
8151 | c[length] = 0;
|
---|
8152 |
|
---|
8153 | return c;
|
---|
8154 | }
|
---|
8155 | else return 0;
|
---|
8156 | }
|
---|
8157 |
|
---|
8158 | xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
|
---|
8159 | {
|
---|
8160 | assert(argc <= 1);
|
---|
8161 |
|
---|
8162 | if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
|
---|
8163 |
|
---|
8164 | return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
|
---|
8165 | }
|
---|
8166 |
|
---|
8167 | xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
|
---|
8168 | {
|
---|
8169 | switch (name.begin[0])
|
---|
8170 | {
|
---|
8171 | case 'b':
|
---|
8172 | if (name == PUGIXML_TEXT("boolean") && argc == 1)
|
---|
8173 | return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
|
---|
8174 |
|
---|
8175 | break;
|
---|
8176 |
|
---|
8177 | case 'c':
|
---|
8178 | if (name == PUGIXML_TEXT("count") && argc == 1)
|
---|
8179 | {
|
---|
8180 | if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
|
---|
8181 | return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
|
---|
8182 | }
|
---|
8183 | else if (name == PUGIXML_TEXT("contains") && argc == 2)
|
---|
8184 | return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
|
---|
8185 | else if (name == PUGIXML_TEXT("concat") && argc >= 2)
|
---|
8186 | return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
|
---|
8187 | else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
|
---|
8188 | return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
|
---|
8189 |
|
---|
8190 | break;
|
---|
8191 |
|
---|
8192 | case 'f':
|
---|
8193 | if (name == PUGIXML_TEXT("false") && argc == 0)
|
---|
8194 | return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
|
---|
8195 | else if (name == PUGIXML_TEXT("floor") && argc == 1)
|
---|
8196 | return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
|
---|
8197 |
|
---|
8198 | break;
|
---|
8199 |
|
---|
8200 | case 'i':
|
---|
8201 | if (name == PUGIXML_TEXT("id") && argc == 1)
|
---|
8202 | return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
|
---|
8203 |
|
---|
8204 | break;
|
---|
8205 |
|
---|
8206 | case 'l':
|
---|
8207 | if (name == PUGIXML_TEXT("last") && argc == 0)
|
---|
8208 | return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
|
---|
8209 | else if (name == PUGIXML_TEXT("lang") && argc == 1)
|
---|
8210 | return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
|
---|
8211 | else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
|
---|
8212 | return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
|
---|
8213 |
|
---|
8214 | break;
|
---|
8215 |
|
---|
8216 | case 'n':
|
---|
8217 | if (name == PUGIXML_TEXT("name") && argc <= 1)
|
---|
8218 | return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
|
---|
8219 | else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
|
---|
8220 | return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
|
---|
8221 | else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
|
---|
8222 | return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
|
---|
8223 | else if (name == PUGIXML_TEXT("not") && argc == 1)
|
---|
8224 | return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
|
---|
8225 | else if (name == PUGIXML_TEXT("number") && argc <= 1)
|
---|
8226 | return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
|
---|
8227 |
|
---|
8228 | break;
|
---|
8229 |
|
---|
8230 | case 'p':
|
---|
8231 | if (name == PUGIXML_TEXT("position") && argc == 0)
|
---|
8232 | return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
|
---|
8233 |
|
---|
8234 | break;
|
---|
8235 |
|
---|
8236 | case 'r':
|
---|
8237 | if (name == PUGIXML_TEXT("round") && argc == 1)
|
---|
8238 | return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
|
---|
8239 |
|
---|
8240 | break;
|
---|
8241 |
|
---|
8242 | case 's':
|
---|
8243 | if (name == PUGIXML_TEXT("string") && argc <= 1)
|
---|
8244 | return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
|
---|
8245 | else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
|
---|
8246 | return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]);
|
---|
8247 | else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
|
---|
8248 | return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
|
---|
8249 | else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
|
---|
8250 | return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
|
---|
8251 | else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
|
---|
8252 | return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
|
---|
8253 | else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
|
---|
8254 | return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
|
---|
8255 | else if (name == PUGIXML_TEXT("sum") && argc == 1)
|
---|
8256 | {
|
---|
8257 | if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
|
---|
8258 | return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
|
---|
8259 | }
|
---|
8260 |
|
---|
8261 | break;
|
---|
8262 |
|
---|
8263 | case 't':
|
---|
8264 | if (name == PUGIXML_TEXT("translate") && argc == 3)
|
---|
8265 | return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
|
---|
8266 | else if (name == PUGIXML_TEXT("true") && argc == 0)
|
---|
8267 | return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
|
---|
8268 |
|
---|
8269 | break;
|
---|
8270 | }
|
---|
8271 |
|
---|
8272 | throw_error("Unrecognized function or wrong parameter count");
|
---|
8273 |
|
---|
8274 | return 0;
|
---|
8275 | }
|
---|
8276 |
|
---|
8277 | axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
|
---|
8278 | {
|
---|
8279 | specified = true;
|
---|
8280 |
|
---|
8281 | switch (name.begin[0])
|
---|
8282 | {
|
---|
8283 | case 'a':
|
---|
8284 | if (name == PUGIXML_TEXT("ancestor"))
|
---|
8285 | return axis_ancestor;
|
---|
8286 | else if (name == PUGIXML_TEXT("ancestor-or-self"))
|
---|
8287 | return axis_ancestor_or_self;
|
---|
8288 | else if (name == PUGIXML_TEXT("attribute"))
|
---|
8289 | return axis_attribute;
|
---|
8290 |
|
---|
8291 | break;
|
---|
8292 |
|
---|
8293 | case 'c':
|
---|
8294 | if (name == PUGIXML_TEXT("child"))
|
---|
8295 | return axis_child;
|
---|
8296 |
|
---|
8297 | break;
|
---|
8298 |
|
---|
8299 | case 'd':
|
---|
8300 | if (name == PUGIXML_TEXT("descendant"))
|
---|
8301 | return axis_descendant;
|
---|
8302 | else if (name == PUGIXML_TEXT("descendant-or-self"))
|
---|
8303 | return axis_descendant_or_self;
|
---|
8304 |
|
---|
8305 | break;
|
---|
8306 |
|
---|
8307 | case 'f':
|
---|
8308 | if (name == PUGIXML_TEXT("following"))
|
---|
8309 | return axis_following;
|
---|
8310 | else if (name == PUGIXML_TEXT("following-sibling"))
|
---|
8311 | return axis_following_sibling;
|
---|
8312 |
|
---|
8313 | break;
|
---|
8314 |
|
---|
8315 | case 'n':
|
---|
8316 | if (name == PUGIXML_TEXT("namespace"))
|
---|
8317 | return axis_namespace;
|
---|
8318 |
|
---|
8319 | break;
|
---|
8320 |
|
---|
8321 | case 'p':
|
---|
8322 | if (name == PUGIXML_TEXT("parent"))
|
---|
8323 | return axis_parent;
|
---|
8324 | else if (name == PUGIXML_TEXT("preceding"))
|
---|
8325 | return axis_preceding;
|
---|
8326 | else if (name == PUGIXML_TEXT("preceding-sibling"))
|
---|
8327 | return axis_preceding_sibling;
|
---|
8328 |
|
---|
8329 | break;
|
---|
8330 |
|
---|
8331 | case 's':
|
---|
8332 | if (name == PUGIXML_TEXT("self"))
|
---|
8333 | return axis_self;
|
---|
8334 |
|
---|
8335 | break;
|
---|
8336 | }
|
---|
8337 |
|
---|
8338 | specified = false;
|
---|
8339 | return axis_child;
|
---|
8340 | }
|
---|
8341 |
|
---|
8342 | nodetest_t parse_node_test_type(const xpath_lexer_string& name)
|
---|
8343 | {
|
---|
8344 | switch (name.begin[0])
|
---|
8345 | {
|
---|
8346 | case 'c':
|
---|
8347 | if (name == PUGIXML_TEXT("comment"))
|
---|
8348 | return nodetest_type_comment;
|
---|
8349 |
|
---|
8350 | break;
|
---|
8351 |
|
---|
8352 | case 'n':
|
---|
8353 | if (name == PUGIXML_TEXT("node"))
|
---|
8354 | return nodetest_type_node;
|
---|
8355 |
|
---|
8356 | break;
|
---|
8357 |
|
---|
8358 | case 'p':
|
---|
8359 | if (name == PUGIXML_TEXT("processing-instruction"))
|
---|
8360 | return nodetest_type_pi;
|
---|
8361 |
|
---|
8362 | break;
|
---|
8363 |
|
---|
8364 | case 't':
|
---|
8365 | if (name == PUGIXML_TEXT("text"))
|
---|
8366 | return nodetest_type_text;
|
---|
8367 |
|
---|
8368 | break;
|
---|
8369 | }
|
---|
8370 |
|
---|
8371 | return nodetest_none;
|
---|
8372 | }
|
---|
8373 |
|
---|
8374 | // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
|
---|
8375 | xpath_ast_node* parse_primary_expression()
|
---|
8376 | {
|
---|
8377 | switch (_lexer.current())
|
---|
8378 | {
|
---|
8379 | case lex_var_ref:
|
---|
8380 | {
|
---|
8381 | xpath_lexer_string name = _lexer.contents();
|
---|
8382 |
|
---|
8383 | if (!_variables)
|
---|
8384 | throw_error("Unknown variable: variable set is not provided");
|
---|
8385 |
|
---|
8386 | xpath_variable* var = get_variable(_variables, name.begin, name.end);
|
---|
8387 |
|
---|
8388 | if (!var)
|
---|
8389 | throw_error("Unknown variable: variable set does not contain the given name");
|
---|
8390 |
|
---|
8391 | _lexer.next();
|
---|
8392 |
|
---|
8393 | return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
|
---|
8394 | }
|
---|
8395 |
|
---|
8396 | case lex_open_brace:
|
---|
8397 | {
|
---|
8398 | _lexer.next();
|
---|
8399 |
|
---|
8400 | xpath_ast_node* n = parse_expression();
|
---|
8401 |
|
---|
8402 | if (_lexer.current() != lex_close_brace)
|
---|
8403 | throw_error("Unmatched braces");
|
---|
8404 |
|
---|
8405 | _lexer.next();
|
---|
8406 |
|
---|
8407 | return n;
|
---|
8408 | }
|
---|
8409 |
|
---|
8410 | case lex_quoted_string:
|
---|
8411 | {
|
---|
8412 | const char_t* value = alloc_string(_lexer.contents());
|
---|
8413 |
|
---|
8414 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
|
---|
8415 | _lexer.next();
|
---|
8416 |
|
---|
8417 | return n;
|
---|
8418 | }
|
---|
8419 |
|
---|
8420 | case lex_number:
|
---|
8421 | {
|
---|
8422 | double value = 0;
|
---|
8423 |
|
---|
8424 | if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value))
|
---|
8425 | throw_error_oom();
|
---|
8426 |
|
---|
8427 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
|
---|
8428 | _lexer.next();
|
---|
8429 |
|
---|
8430 | return n;
|
---|
8431 | }
|
---|
8432 |
|
---|
8433 | case lex_string:
|
---|
8434 | {
|
---|
8435 | xpath_ast_node* args[2] = {0};
|
---|
8436 | size_t argc = 0;
|
---|
8437 |
|
---|
8438 | xpath_lexer_string function = _lexer.contents();
|
---|
8439 | _lexer.next();
|
---|
8440 |
|
---|
8441 | xpath_ast_node* last_arg = 0;
|
---|
8442 |
|
---|
8443 | if (_lexer.current() != lex_open_brace)
|
---|
8444 | throw_error("Unrecognized function call");
|
---|
8445 | _lexer.next();
|
---|
8446 |
|
---|
8447 | if (_lexer.current() != lex_close_brace)
|
---|
8448 | args[argc++] = parse_expression();
|
---|
8449 |
|
---|
8450 | while (_lexer.current() != lex_close_brace)
|
---|
8451 | {
|
---|
8452 | if (_lexer.current() != lex_comma)
|
---|
8453 | throw_error("No comma between function arguments");
|
---|
8454 | _lexer.next();
|
---|
8455 |
|
---|
8456 | xpath_ast_node* n = parse_expression();
|
---|
8457 |
|
---|
8458 | if (argc < 2) args[argc] = n;
|
---|
8459 | else last_arg->set_next(n);
|
---|
8460 |
|
---|
8461 | argc++;
|
---|
8462 | last_arg = n;
|
---|
8463 | }
|
---|
8464 |
|
---|
8465 | _lexer.next();
|
---|
8466 |
|
---|
8467 | return parse_function(function, argc, args);
|
---|
8468 | }
|
---|
8469 |
|
---|
8470 | default:
|
---|
8471 | throw_error("Unrecognizable primary expression");
|
---|
8472 |
|
---|
8473 | return 0;
|
---|
8474 | }
|
---|
8475 | }
|
---|
8476 |
|
---|
8477 | // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
|
---|
8478 | // Predicate ::= '[' PredicateExpr ']'
|
---|
8479 | // PredicateExpr ::= Expr
|
---|
8480 | xpath_ast_node* parse_filter_expression()
|
---|
8481 | {
|
---|
8482 | xpath_ast_node* n = parse_primary_expression();
|
---|
8483 |
|
---|
8484 | while (_lexer.current() == lex_open_square_brace)
|
---|
8485 | {
|
---|
8486 | _lexer.next();
|
---|
8487 |
|
---|
8488 | xpath_ast_node* expr = parse_expression();
|
---|
8489 |
|
---|
8490 | if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
|
---|
8491 |
|
---|
8492 | bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
|
---|
8493 |
|
---|
8494 | n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
|
---|
8495 |
|
---|
8496 | if (_lexer.current() != lex_close_square_brace)
|
---|
8497 | throw_error("Unmatched square brace");
|
---|
8498 |
|
---|
8499 | _lexer.next();
|
---|
8500 | }
|
---|
8501 |
|
---|
8502 | return n;
|
---|
8503 | }
|
---|
8504 |
|
---|
8505 | // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
|
---|
8506 | // AxisSpecifier ::= AxisName '::' | '@'?
|
---|
8507 | // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
|
---|
8508 | // NameTest ::= '*' | NCName ':' '*' | QName
|
---|
8509 | // AbbreviatedStep ::= '.' | '..'
|
---|
8510 | xpath_ast_node* parse_step(xpath_ast_node* set)
|
---|
8511 | {
|
---|
8512 | if (set && set->rettype() != xpath_type_node_set)
|
---|
8513 | throw_error("Step has to be applied to node set");
|
---|
8514 |
|
---|
8515 | bool axis_specified = false;
|
---|
8516 | axis_t axis = axis_child; // implied child axis
|
---|
8517 |
|
---|
8518 | if (_lexer.current() == lex_axis_attribute)
|
---|
8519 | {
|
---|
8520 | axis = axis_attribute;
|
---|
8521 | axis_specified = true;
|
---|
8522 |
|
---|
8523 | _lexer.next();
|
---|
8524 | }
|
---|
8525 | else if (_lexer.current() == lex_dot)
|
---|
8526 | {
|
---|
8527 | _lexer.next();
|
---|
8528 |
|
---|
8529 | return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
|
---|
8530 | }
|
---|
8531 | else if (_lexer.current() == lex_double_dot)
|
---|
8532 | {
|
---|
8533 | _lexer.next();
|
---|
8534 |
|
---|
8535 | return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
|
---|
8536 | }
|
---|
8537 |
|
---|
8538 | nodetest_t nt_type = nodetest_none;
|
---|
8539 | xpath_lexer_string nt_name;
|
---|
8540 |
|
---|
8541 | if (_lexer.current() == lex_string)
|
---|
8542 | {
|
---|
8543 | // node name test
|
---|
8544 | nt_name = _lexer.contents();
|
---|
8545 | _lexer.next();
|
---|
8546 |
|
---|
8547 | // was it an axis name?
|
---|
8548 | if (_lexer.current() == lex_double_colon)
|
---|
8549 | {
|
---|
8550 | // parse axis name
|
---|
8551 | if (axis_specified) throw_error("Two axis specifiers in one step");
|
---|
8552 |
|
---|
8553 | axis = parse_axis_name(nt_name, axis_specified);
|
---|
8554 |
|
---|
8555 | if (!axis_specified) throw_error("Unknown axis");
|
---|
8556 |
|
---|
8557 | // read actual node test
|
---|
8558 | _lexer.next();
|
---|
8559 |
|
---|
8560 | if (_lexer.current() == lex_multiply)
|
---|
8561 | {
|
---|
8562 | nt_type = nodetest_all;
|
---|
8563 | nt_name = xpath_lexer_string();
|
---|
8564 | _lexer.next();
|
---|
8565 | }
|
---|
8566 | else if (_lexer.current() == lex_string)
|
---|
8567 | {
|
---|
8568 | nt_name = _lexer.contents();
|
---|
8569 | _lexer.next();
|
---|
8570 | }
|
---|
8571 | else throw_error("Unrecognized node test");
|
---|
8572 | }
|
---|
8573 |
|
---|
8574 | if (nt_type == nodetest_none)
|
---|
8575 | {
|
---|
8576 | // node type test or processing-instruction
|
---|
8577 | if (_lexer.current() == lex_open_brace)
|
---|
8578 | {
|
---|
8579 | _lexer.next();
|
---|
8580 |
|
---|
8581 | if (_lexer.current() == lex_close_brace)
|
---|
8582 | {
|
---|
8583 | _lexer.next();
|
---|
8584 |
|
---|
8585 | nt_type = parse_node_test_type(nt_name);
|
---|
8586 |
|
---|
8587 | if (nt_type == nodetest_none) throw_error("Unrecognized node type");
|
---|
8588 |
|
---|
8589 | nt_name = xpath_lexer_string();
|
---|
8590 | }
|
---|
8591 | else if (nt_name == PUGIXML_TEXT("processing-instruction"))
|
---|
8592 | {
|
---|
8593 | if (_lexer.current() != lex_quoted_string)
|
---|
8594 | throw_error("Only literals are allowed as arguments to processing-instruction()");
|
---|
8595 |
|
---|
8596 | nt_type = nodetest_pi;
|
---|
8597 | nt_name = _lexer.contents();
|
---|
8598 | _lexer.next();
|
---|
8599 |
|
---|
8600 | if (_lexer.current() != lex_close_brace)
|
---|
8601 | throw_error("Unmatched brace near processing-instruction()");
|
---|
8602 | _lexer.next();
|
---|
8603 | }
|
---|
8604 | else
|
---|
8605 | throw_error("Unmatched brace near node type test");
|
---|
8606 |
|
---|
8607 | }
|
---|
8608 | // QName or NCName:*
|
---|
8609 | else
|
---|
8610 | {
|
---|
8611 | if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
|
---|
8612 | {
|
---|
8613 | nt_name.end--; // erase *
|
---|
8614 |
|
---|
8615 | nt_type = nodetest_all_in_namespace;
|
---|
8616 | }
|
---|
8617 | else nt_type = nodetest_name;
|
---|
8618 | }
|
---|
8619 | }
|
---|
8620 | }
|
---|
8621 | else if (_lexer.current() == lex_multiply)
|
---|
8622 | {
|
---|
8623 | nt_type = nodetest_all;
|
---|
8624 | _lexer.next();
|
---|
8625 | }
|
---|
8626 | else throw_error("Unrecognized node test");
|
---|
8627 |
|
---|
8628 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
|
---|
8629 |
|
---|
8630 | xpath_ast_node* last = 0;
|
---|
8631 |
|
---|
8632 | while (_lexer.current() == lex_open_square_brace)
|
---|
8633 | {
|
---|
8634 | _lexer.next();
|
---|
8635 |
|
---|
8636 | xpath_ast_node* expr = parse_expression();
|
---|
8637 |
|
---|
8638 | xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
|
---|
8639 |
|
---|
8640 | if (_lexer.current() != lex_close_square_brace)
|
---|
8641 | throw_error("Unmatched square brace");
|
---|
8642 | _lexer.next();
|
---|
8643 |
|
---|
8644 | if (last) last->set_next(pred);
|
---|
8645 | else n->set_right(pred);
|
---|
8646 |
|
---|
8647 | last = pred;
|
---|
8648 | }
|
---|
8649 |
|
---|
8650 | return n;
|
---|
8651 | }
|
---|
8652 |
|
---|
8653 | // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
|
---|
8654 | xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
|
---|
8655 | {
|
---|
8656 | xpath_ast_node* n = parse_step(set);
|
---|
8657 |
|
---|
8658 | while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
|
---|
8659 | {
|
---|
8660 | lexeme_t l = _lexer.current();
|
---|
8661 | _lexer.next();
|
---|
8662 |
|
---|
8663 | if (l == lex_double_slash)
|
---|
8664 | n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
|
---|
8665 |
|
---|
8666 | n = parse_step(n);
|
---|
8667 | }
|
---|
8668 |
|
---|
8669 | return n;
|
---|
8670 | }
|
---|
8671 |
|
---|
8672 | // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
|
---|
8673 | // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
|
---|
8674 | xpath_ast_node* parse_location_path()
|
---|
8675 | {
|
---|
8676 | if (_lexer.current() == lex_slash)
|
---|
8677 | {
|
---|
8678 | _lexer.next();
|
---|
8679 |
|
---|
8680 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
|
---|
8681 |
|
---|
8682 | // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
|
---|
8683 | lexeme_t l = _lexer.current();
|
---|
8684 |
|
---|
8685 | if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
|
---|
8686 | return parse_relative_location_path(n);
|
---|
8687 | else
|
---|
8688 | return n;
|
---|
8689 | }
|
---|
8690 | else if (_lexer.current() == lex_double_slash)
|
---|
8691 | {
|
---|
8692 | _lexer.next();
|
---|
8693 |
|
---|
8694 | xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
|
---|
8695 | n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
|
---|
8696 |
|
---|
8697 | return parse_relative_location_path(n);
|
---|
8698 | }
|
---|
8699 |
|
---|
8700 | // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
|
---|
8701 | return parse_relative_location_path(0);
|
---|
8702 | }
|
---|
8703 |
|
---|
8704 | // PathExpr ::= LocationPath
|
---|
8705 | // | FilterExpr
|
---|
8706 | // | FilterExpr '/' RelativeLocationPath
|
---|
8707 | // | FilterExpr '//' RelativeLocationPath
|
---|
8708 | xpath_ast_node* parse_path_expression()
|
---|
8709 | {
|
---|
8710 | // Clarification.
|
---|
8711 | // PathExpr begins with either LocationPath or FilterExpr.
|
---|
8712 | // FilterExpr begins with PrimaryExpr
|
---|
8713 | // PrimaryExpr begins with '$' in case of it being a variable reference,
|
---|
8714 | // '(' in case of it being an expression, string literal, number constant or
|
---|
8715 | // function call.
|
---|
8716 |
|
---|
8717 | if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
|
---|
8718 | _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
|
---|
8719 | _lexer.current() == lex_string)
|
---|
8720 | {
|
---|
8721 | if (_lexer.current() == lex_string)
|
---|
8722 | {
|
---|
8723 | // This is either a function call, or not - if not, we shall proceed with location path
|
---|
8724 | const char_t* state = _lexer.state();
|
---|
8725 |
|
---|
8726 | while (IS_CHARTYPE(*state, ct_space)) ++state;
|
---|
8727 |
|
---|
8728 | if (*state != '(') return parse_location_path();
|
---|
8729 |
|
---|
8730 | // This looks like a function call; however this still can be a node-test. Check it.
|
---|
8731 | if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
|
---|
8732 | }
|
---|
8733 |
|
---|
8734 | xpath_ast_node* n = parse_filter_expression();
|
---|
8735 |
|
---|
8736 | if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
|
---|
8737 | {
|
---|
8738 | lexeme_t l = _lexer.current();
|
---|
8739 | _lexer.next();
|
---|
8740 |
|
---|
8741 | if (l == lex_double_slash)
|
---|
8742 | {
|
---|
8743 | if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
|
---|
8744 |
|
---|
8745 | n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
|
---|
8746 | }
|
---|
8747 |
|
---|
8748 | // select from location path
|
---|
8749 | return parse_relative_location_path(n);
|
---|
8750 | }
|
---|
8751 |
|
---|
8752 | return n;
|
---|
8753 | }
|
---|
8754 | else return parse_location_path();
|
---|
8755 | }
|
---|
8756 |
|
---|
8757 | // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
|
---|
8758 | xpath_ast_node* parse_union_expression()
|
---|
8759 | {
|
---|
8760 | xpath_ast_node* n = parse_path_expression();
|
---|
8761 |
|
---|
8762 | while (_lexer.current() == lex_union)
|
---|
8763 | {
|
---|
8764 | _lexer.next();
|
---|
8765 |
|
---|
8766 | xpath_ast_node* expr = parse_union_expression();
|
---|
8767 |
|
---|
8768 | if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
|
---|
8769 | throw_error("Union operator has to be applied to node sets");
|
---|
8770 |
|
---|
8771 | n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
|
---|
8772 | }
|
---|
8773 |
|
---|
8774 | return n;
|
---|
8775 | }
|
---|
8776 |
|
---|
8777 | // UnaryExpr ::= UnionExpr | '-' UnaryExpr
|
---|
8778 | xpath_ast_node* parse_unary_expression()
|
---|
8779 | {
|
---|
8780 | if (_lexer.current() == lex_minus)
|
---|
8781 | {
|
---|
8782 | _lexer.next();
|
---|
8783 |
|
---|
8784 | xpath_ast_node* expr = parse_unary_expression();
|
---|
8785 |
|
---|
8786 | return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
|
---|
8787 | }
|
---|
8788 | else return parse_union_expression();
|
---|
8789 | }
|
---|
8790 |
|
---|
8791 | // MultiplicativeExpr ::= UnaryExpr
|
---|
8792 | // | MultiplicativeExpr '*' UnaryExpr
|
---|
8793 | // | MultiplicativeExpr 'div' UnaryExpr
|
---|
8794 | // | MultiplicativeExpr 'mod' UnaryExpr
|
---|
8795 | xpath_ast_node* parse_multiplicative_expression()
|
---|
8796 | {
|
---|
8797 | xpath_ast_node* n = parse_unary_expression();
|
---|
8798 |
|
---|
8799 | while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
|
---|
8800 | (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
|
---|
8801 | {
|
---|
8802 | ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
|
---|
8803 | _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
|
---|
8804 | _lexer.next();
|
---|
8805 |
|
---|
8806 | xpath_ast_node* expr = parse_unary_expression();
|
---|
8807 |
|
---|
8808 | n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
|
---|
8809 | }
|
---|
8810 |
|
---|
8811 | return n;
|
---|
8812 | }
|
---|
8813 |
|
---|
8814 | // AdditiveExpr ::= MultiplicativeExpr
|
---|
8815 | // | AdditiveExpr '+' MultiplicativeExpr
|
---|
8816 | // | AdditiveExpr '-' MultiplicativeExpr
|
---|
8817 | xpath_ast_node* parse_additive_expression()
|
---|
8818 | {
|
---|
8819 | xpath_ast_node* n = parse_multiplicative_expression();
|
---|
8820 |
|
---|
8821 | while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
|
---|
8822 | {
|
---|
8823 | lexeme_t l = _lexer.current();
|
---|
8824 |
|
---|
8825 | _lexer.next();
|
---|
8826 |
|
---|
8827 | xpath_ast_node* expr = parse_multiplicative_expression();
|
---|
8828 |
|
---|
8829 | n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
|
---|
8830 | }
|
---|
8831 |
|
---|
8832 | return n;
|
---|
8833 | }
|
---|
8834 |
|
---|
8835 | // RelationalExpr ::= AdditiveExpr
|
---|
8836 | // | RelationalExpr '<' AdditiveExpr
|
---|
8837 | // | RelationalExpr '>' AdditiveExpr
|
---|
8838 | // | RelationalExpr '<=' AdditiveExpr
|
---|
8839 | // | RelationalExpr '>=' AdditiveExpr
|
---|
8840 | xpath_ast_node* parse_relational_expression()
|
---|
8841 | {
|
---|
8842 | xpath_ast_node* n = parse_additive_expression();
|
---|
8843 |
|
---|
8844 | while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
|
---|
8845 | _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
|
---|
8846 | {
|
---|
8847 | lexeme_t l = _lexer.current();
|
---|
8848 | _lexer.next();
|
---|
8849 |
|
---|
8850 | xpath_ast_node* expr = parse_additive_expression();
|
---|
8851 |
|
---|
8852 | n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
|
---|
8853 | l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
|
---|
8854 | }
|
---|
8855 |
|
---|
8856 | return n;
|
---|
8857 | }
|
---|
8858 |
|
---|
8859 | // EqualityExpr ::= RelationalExpr
|
---|
8860 | // | EqualityExpr '=' RelationalExpr
|
---|
8861 | // | EqualityExpr '!=' RelationalExpr
|
---|
8862 | xpath_ast_node* parse_equality_expression()
|
---|
8863 | {
|
---|
8864 | xpath_ast_node* n = parse_relational_expression();
|
---|
8865 |
|
---|
8866 | while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
|
---|
8867 | {
|
---|
8868 | lexeme_t l = _lexer.current();
|
---|
8869 |
|
---|
8870 | _lexer.next();
|
---|
8871 |
|
---|
8872 | xpath_ast_node* expr = parse_relational_expression();
|
---|
8873 |
|
---|
8874 | n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
|
---|
8875 | }
|
---|
8876 |
|
---|
8877 | return n;
|
---|
8878 | }
|
---|
8879 |
|
---|
8880 | // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
|
---|
8881 | xpath_ast_node* parse_and_expression()
|
---|
8882 | {
|
---|
8883 | xpath_ast_node* n = parse_equality_expression();
|
---|
8884 |
|
---|
8885 | while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
|
---|
8886 | {
|
---|
8887 | _lexer.next();
|
---|
8888 |
|
---|
8889 | xpath_ast_node* expr = parse_equality_expression();
|
---|
8890 |
|
---|
8891 | n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
|
---|
8892 | }
|
---|
8893 |
|
---|
8894 | return n;
|
---|
8895 | }
|
---|
8896 |
|
---|
8897 | // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
|
---|
8898 | xpath_ast_node* parse_or_expression()
|
---|
8899 | {
|
---|
8900 | xpath_ast_node* n = parse_and_expression();
|
---|
8901 |
|
---|
8902 | while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
|
---|
8903 | {
|
---|
8904 | _lexer.next();
|
---|
8905 |
|
---|
8906 | xpath_ast_node* expr = parse_and_expression();
|
---|
8907 |
|
---|
8908 | n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
|
---|
8909 | }
|
---|
8910 |
|
---|
8911 | return n;
|
---|
8912 | }
|
---|
8913 |
|
---|
8914 | // Expr ::= OrExpr
|
---|
8915 | xpath_ast_node* parse_expression()
|
---|
8916 | {
|
---|
8917 | return parse_or_expression();
|
---|
8918 | }
|
---|
8919 |
|
---|
8920 | xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
|
---|
8921 | {
|
---|
8922 | }
|
---|
8923 |
|
---|
8924 | xpath_ast_node* parse()
|
---|
8925 | {
|
---|
8926 | xpath_ast_node* result = parse_expression();
|
---|
8927 |
|
---|
8928 | if (_lexer.current() != lex_eof)
|
---|
8929 | {
|
---|
8930 | // there are still unparsed tokens left, error
|
---|
8931 | throw_error("Incorrect query");
|
---|
8932 | }
|
---|
8933 |
|
---|
8934 | return result;
|
---|
8935 | }
|
---|
8936 |
|
---|
8937 | static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
|
---|
8938 | {
|
---|
8939 | xpath_parser parser(query, variables, alloc, result);
|
---|
8940 |
|
---|
8941 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
8942 | int error = setjmp(parser._error_handler);
|
---|
8943 |
|
---|
8944 | return (error == 0) ? parser.parse() : 0;
|
---|
8945 | #else
|
---|
8946 | return parser.parse();
|
---|
8947 | #endif
|
---|
8948 | }
|
---|
8949 | };
|
---|
8950 |
|
---|
8951 | struct xpath_query_impl
|
---|
8952 | {
|
---|
8953 | static xpath_query_impl* create()
|
---|
8954 | {
|
---|
8955 | void* memory = global_allocate(sizeof(xpath_query_impl));
|
---|
8956 |
|
---|
8957 | return new (memory) xpath_query_impl();
|
---|
8958 | }
|
---|
8959 |
|
---|
8960 | static void destroy(void* ptr)
|
---|
8961 | {
|
---|
8962 | if (!ptr) return;
|
---|
8963 |
|
---|
8964 | // free all allocated pages
|
---|
8965 | static_cast<xpath_query_impl*>(ptr)->alloc.release();
|
---|
8966 |
|
---|
8967 | // free allocator memory (with the first page)
|
---|
8968 | global_deallocate(ptr);
|
---|
8969 | }
|
---|
8970 |
|
---|
8971 | xpath_query_impl(): root(0), alloc(&block)
|
---|
8972 | {
|
---|
8973 | block.next = 0;
|
---|
8974 | }
|
---|
8975 |
|
---|
8976 | xpath_ast_node* root;
|
---|
8977 | xpath_allocator alloc;
|
---|
8978 | xpath_memory_block block;
|
---|
8979 | };
|
---|
8980 |
|
---|
8981 | xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
|
---|
8982 | {
|
---|
8983 | if (!impl) return xpath_string();
|
---|
8984 |
|
---|
8985 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
8986 | if (setjmp(sd.error_handler)) return xpath_string();
|
---|
8987 | #endif
|
---|
8988 |
|
---|
8989 | xpath_context c(n, 1, 1);
|
---|
8990 |
|
---|
8991 | return impl->root->eval_string(c, sd.stack);
|
---|
8992 | }
|
---|
8993 | }
|
---|
8994 |
|
---|
8995 | namespace pugi
|
---|
8996 | {
|
---|
8997 | #ifndef PUGIXML_NO_EXCEPTIONS
|
---|
8998 | xpath_exception::xpath_exception(const xpath_parse_result& result): _result(result)
|
---|
8999 | {
|
---|
9000 | assert(result.error);
|
---|
9001 | }
|
---|
9002 |
|
---|
9003 | const char* xpath_exception::what() const throw()
|
---|
9004 | {
|
---|
9005 | return _result.error;
|
---|
9006 | }
|
---|
9007 |
|
---|
9008 | const xpath_parse_result& xpath_exception::result() const
|
---|
9009 | {
|
---|
9010 | return _result;
|
---|
9011 | }
|
---|
9012 | #endif
|
---|
9013 |
|
---|
9014 | xpath_node::xpath_node()
|
---|
9015 | {
|
---|
9016 | }
|
---|
9017 |
|
---|
9018 | xpath_node::xpath_node(const xml_node& node): _node(node)
|
---|
9019 | {
|
---|
9020 | }
|
---|
9021 |
|
---|
9022 | xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): _node(attribute ? parent : xml_node()), _attribute(attribute)
|
---|
9023 | {
|
---|
9024 | }
|
---|
9025 |
|
---|
9026 | xml_node xpath_node::node() const
|
---|
9027 | {
|
---|
9028 | return _attribute ? xml_node() : _node;
|
---|
9029 | }
|
---|
9030 |
|
---|
9031 | xml_attribute xpath_node::attribute() const
|
---|
9032 | {
|
---|
9033 | return _attribute;
|
---|
9034 | }
|
---|
9035 |
|
---|
9036 | xml_node xpath_node::parent() const
|
---|
9037 | {
|
---|
9038 | return _attribute ? _node : _node.parent();
|
---|
9039 | }
|
---|
9040 |
|
---|
9041 | xpath_node::operator xpath_node::unspecified_bool_type() const
|
---|
9042 | {
|
---|
9043 | return (_node || _attribute) ? &xpath_node::_node : 0;
|
---|
9044 | }
|
---|
9045 |
|
---|
9046 | bool xpath_node::operator!() const
|
---|
9047 | {
|
---|
9048 | return !(_node || _attribute);
|
---|
9049 | }
|
---|
9050 |
|
---|
9051 | bool xpath_node::operator==(const xpath_node& n) const
|
---|
9052 | {
|
---|
9053 | return _node == n._node && _attribute == n._attribute;
|
---|
9054 | }
|
---|
9055 |
|
---|
9056 | bool xpath_node::operator!=(const xpath_node& n) const
|
---|
9057 | {
|
---|
9058 | return _node != n._node || _attribute != n._attribute;
|
---|
9059 | }
|
---|
9060 |
|
---|
9061 | #ifdef __BORLANDC__
|
---|
9062 | bool operator&&(const xpath_node& lhs, bool rhs)
|
---|
9063 | {
|
---|
9064 | return (bool)lhs && rhs;
|
---|
9065 | }
|
---|
9066 |
|
---|
9067 | bool operator||(const xpath_node& lhs, bool rhs)
|
---|
9068 | {
|
---|
9069 | return (bool)lhs || rhs;
|
---|
9070 | }
|
---|
9071 | #endif
|
---|
9072 |
|
---|
9073 | void xpath_node_set::_assign(const_iterator begin, const_iterator end)
|
---|
9074 | {
|
---|
9075 | assert(begin <= end);
|
---|
9076 |
|
---|
9077 | size_t size = static_cast<size_t>(end - begin);
|
---|
9078 |
|
---|
9079 | if (size <= 1)
|
---|
9080 | {
|
---|
9081 | // deallocate old buffer
|
---|
9082 | if (_begin != &_storage) global_deallocate(_begin);
|
---|
9083 |
|
---|
9084 | // use internal buffer
|
---|
9085 | if (begin != end) _storage = *begin;
|
---|
9086 |
|
---|
9087 | _begin = &_storage;
|
---|
9088 | _end = &_storage + size;
|
---|
9089 | }
|
---|
9090 | else
|
---|
9091 | {
|
---|
9092 | // make heap copy
|
---|
9093 | xpath_node* storage = static_cast<xpath_node*>(global_allocate(size * sizeof(xpath_node)));
|
---|
9094 |
|
---|
9095 | if (!storage)
|
---|
9096 | {
|
---|
9097 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
9098 | return;
|
---|
9099 | #else
|
---|
9100 | throw std::bad_alloc();
|
---|
9101 | #endif
|
---|
9102 | }
|
---|
9103 |
|
---|
9104 | memcpy(storage, begin, size * sizeof(xpath_node));
|
---|
9105 |
|
---|
9106 | // deallocate old buffer
|
---|
9107 | if (_begin != &_storage) global_deallocate(_begin);
|
---|
9108 |
|
---|
9109 | // finalize
|
---|
9110 | _begin = storage;
|
---|
9111 | _end = storage + size;
|
---|
9112 | }
|
---|
9113 | }
|
---|
9114 |
|
---|
9115 | xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
|
---|
9116 | {
|
---|
9117 | }
|
---|
9118 |
|
---|
9119 | xpath_node_set::xpath_node_set(const_iterator begin, const_iterator end, type_t type): _type(type), _begin(&_storage), _end(&_storage)
|
---|
9120 | {
|
---|
9121 | _assign(begin, end);
|
---|
9122 | }
|
---|
9123 |
|
---|
9124 | xpath_node_set::~xpath_node_set()
|
---|
9125 | {
|
---|
9126 | if (_begin != &_storage) global_deallocate(_begin);
|
---|
9127 | }
|
---|
9128 |
|
---|
9129 | xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
|
---|
9130 | {
|
---|
9131 | _assign(ns._begin, ns._end);
|
---|
9132 | }
|
---|
9133 |
|
---|
9134 | xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
|
---|
9135 | {
|
---|
9136 | if (this == &ns) return *this;
|
---|
9137 |
|
---|
9138 | _type = ns._type;
|
---|
9139 | _assign(ns._begin, ns._end);
|
---|
9140 |
|
---|
9141 | return *this;
|
---|
9142 | }
|
---|
9143 |
|
---|
9144 | xpath_node_set::type_t xpath_node_set::type() const
|
---|
9145 | {
|
---|
9146 | return _type;
|
---|
9147 | }
|
---|
9148 |
|
---|
9149 | size_t xpath_node_set::size() const
|
---|
9150 | {
|
---|
9151 | return _end - _begin;
|
---|
9152 | }
|
---|
9153 |
|
---|
9154 | bool xpath_node_set::empty() const
|
---|
9155 | {
|
---|
9156 | return _begin == _end;
|
---|
9157 | }
|
---|
9158 |
|
---|
9159 | const xpath_node& xpath_node_set::operator[](size_t index) const
|
---|
9160 | {
|
---|
9161 | assert(index < size());
|
---|
9162 | return _begin[index];
|
---|
9163 | }
|
---|
9164 |
|
---|
9165 | xpath_node_set::const_iterator xpath_node_set::begin() const
|
---|
9166 | {
|
---|
9167 | return _begin;
|
---|
9168 | }
|
---|
9169 |
|
---|
9170 | xpath_node_set::const_iterator xpath_node_set::end() const
|
---|
9171 | {
|
---|
9172 | return _end;
|
---|
9173 | }
|
---|
9174 |
|
---|
9175 | void xpath_node_set::sort(bool reverse)
|
---|
9176 | {
|
---|
9177 | _type = xpath_sort(_begin, _end, _type, reverse);
|
---|
9178 | }
|
---|
9179 |
|
---|
9180 | xpath_node xpath_node_set::first() const
|
---|
9181 | {
|
---|
9182 | return xpath_first(_begin, _end, _type);
|
---|
9183 | }
|
---|
9184 |
|
---|
9185 | xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
|
---|
9186 | {
|
---|
9187 | }
|
---|
9188 |
|
---|
9189 | xpath_parse_result::operator bool() const
|
---|
9190 | {
|
---|
9191 | return error == 0;
|
---|
9192 | }
|
---|
9193 | const char* xpath_parse_result::description() const
|
---|
9194 | {
|
---|
9195 | return error ? error : "No error";
|
---|
9196 | }
|
---|
9197 |
|
---|
9198 | xpath_variable::xpath_variable()
|
---|
9199 | {
|
---|
9200 | }
|
---|
9201 |
|
---|
9202 | const char_t* xpath_variable::name() const
|
---|
9203 | {
|
---|
9204 | switch (_type)
|
---|
9205 | {
|
---|
9206 | case xpath_type_node_set:
|
---|
9207 | return static_cast<const xpath_variable_node_set*>(this)->name;
|
---|
9208 |
|
---|
9209 | case xpath_type_number:
|
---|
9210 | return static_cast<const xpath_variable_number*>(this)->name;
|
---|
9211 |
|
---|
9212 | case xpath_type_string:
|
---|
9213 | return static_cast<const xpath_variable_string*>(this)->name;
|
---|
9214 |
|
---|
9215 | case xpath_type_boolean:
|
---|
9216 | return static_cast<const xpath_variable_boolean*>(this)->name;
|
---|
9217 |
|
---|
9218 | default:
|
---|
9219 | assert(!"Invalid variable type");
|
---|
9220 | return 0;
|
---|
9221 | }
|
---|
9222 | }
|
---|
9223 |
|
---|
9224 | xpath_value_type xpath_variable::type() const
|
---|
9225 | {
|
---|
9226 | return _type;
|
---|
9227 | }
|
---|
9228 |
|
---|
9229 | bool xpath_variable::get_boolean() const
|
---|
9230 | {
|
---|
9231 | return (_type == xpath_type_boolean) ? static_cast<const xpath_variable_boolean*>(this)->value : false;
|
---|
9232 | }
|
---|
9233 |
|
---|
9234 | double xpath_variable::get_number() const
|
---|
9235 | {
|
---|
9236 | return (_type == xpath_type_number) ? static_cast<const xpath_variable_number*>(this)->value : gen_nan();
|
---|
9237 | }
|
---|
9238 |
|
---|
9239 | const char_t* xpath_variable::get_string() const
|
---|
9240 | {
|
---|
9241 | const char_t* value = (_type == xpath_type_string) ? static_cast<const xpath_variable_string*>(this)->value : 0;
|
---|
9242 | return value ? value : PUGIXML_TEXT("");
|
---|
9243 | }
|
---|
9244 |
|
---|
9245 | const xpath_node_set& xpath_variable::get_node_set() const
|
---|
9246 | {
|
---|
9247 | return (_type == xpath_type_node_set) ? static_cast<const xpath_variable_node_set*>(this)->value : dummy_node_set;
|
---|
9248 | }
|
---|
9249 |
|
---|
9250 | bool xpath_variable::set(bool value)
|
---|
9251 | {
|
---|
9252 | if (_type != xpath_type_boolean) return false;
|
---|
9253 |
|
---|
9254 | static_cast<xpath_variable_boolean*>(this)->value = value;
|
---|
9255 | return true;
|
---|
9256 | }
|
---|
9257 |
|
---|
9258 | bool xpath_variable::set(double value)
|
---|
9259 | {
|
---|
9260 | if (_type != xpath_type_number) return false;
|
---|
9261 |
|
---|
9262 | static_cast<xpath_variable_number*>(this)->value = value;
|
---|
9263 | return true;
|
---|
9264 | }
|
---|
9265 |
|
---|
9266 | bool xpath_variable::set(const char_t* value)
|
---|
9267 | {
|
---|
9268 | if (_type != xpath_type_string) return false;
|
---|
9269 |
|
---|
9270 | xpath_variable_string* var = static_cast<xpath_variable_string*>(this);
|
---|
9271 |
|
---|
9272 | // duplicate string
|
---|
9273 | size_t size = (strlength(value) + 1) * sizeof(char_t);
|
---|
9274 |
|
---|
9275 | char_t* copy = static_cast<char_t*>(global_allocate(size));
|
---|
9276 | if (!copy) return false;
|
---|
9277 |
|
---|
9278 | memcpy(copy, value, size);
|
---|
9279 |
|
---|
9280 | // replace old string
|
---|
9281 | if (var->value) global_deallocate(var->value);
|
---|
9282 | var->value = copy;
|
---|
9283 |
|
---|
9284 | return true;
|
---|
9285 | }
|
---|
9286 |
|
---|
9287 | bool xpath_variable::set(const xpath_node_set& value)
|
---|
9288 | {
|
---|
9289 | if (_type != xpath_type_node_set) return false;
|
---|
9290 |
|
---|
9291 | static_cast<xpath_variable_node_set*>(this)->value = value;
|
---|
9292 | return true;
|
---|
9293 | }
|
---|
9294 |
|
---|
9295 | xpath_variable_set::xpath_variable_set()
|
---|
9296 | {
|
---|
9297 | for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
|
---|
9298 | }
|
---|
9299 |
|
---|
9300 | xpath_variable_set::~xpath_variable_set()
|
---|
9301 | {
|
---|
9302 | for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
|
---|
9303 | {
|
---|
9304 | xpath_variable* var = _data[i];
|
---|
9305 |
|
---|
9306 | while (var)
|
---|
9307 | {
|
---|
9308 | xpath_variable* next = var->_next;
|
---|
9309 |
|
---|
9310 | delete_xpath_variable(var->_type, var);
|
---|
9311 |
|
---|
9312 | var = next;
|
---|
9313 | }
|
---|
9314 | }
|
---|
9315 | }
|
---|
9316 |
|
---|
9317 | xpath_variable* xpath_variable_set::find(const char_t* name) const
|
---|
9318 | {
|
---|
9319 | const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
|
---|
9320 | size_t hash = hash_string(name) % hash_size;
|
---|
9321 |
|
---|
9322 | // look for existing variable
|
---|
9323 | for (xpath_variable* var = _data[hash]; var; var = var->_next)
|
---|
9324 | if (strequal(var->name(), name))
|
---|
9325 | return var;
|
---|
9326 |
|
---|
9327 | return 0;
|
---|
9328 | }
|
---|
9329 |
|
---|
9330 | xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
|
---|
9331 | {
|
---|
9332 | const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
|
---|
9333 | size_t hash = hash_string(name) % hash_size;
|
---|
9334 |
|
---|
9335 | // look for existing variable
|
---|
9336 | for (xpath_variable* var = _data[hash]; var; var = var->_next)
|
---|
9337 | if (strequal(var->name(), name))
|
---|
9338 | return var->type() == type ? var : 0;
|
---|
9339 |
|
---|
9340 | // add new variable
|
---|
9341 | xpath_variable* result = new_xpath_variable(type, name);
|
---|
9342 |
|
---|
9343 | if (result)
|
---|
9344 | {
|
---|
9345 | result->_type = type;
|
---|
9346 | result->_next = _data[hash];
|
---|
9347 |
|
---|
9348 | _data[hash] = result;
|
---|
9349 | }
|
---|
9350 |
|
---|
9351 | return result;
|
---|
9352 | }
|
---|
9353 |
|
---|
9354 | bool xpath_variable_set::set(const char_t* name, bool value)
|
---|
9355 | {
|
---|
9356 | xpath_variable* var = add(name, xpath_type_boolean);
|
---|
9357 | return var ? var->set(value) : false;
|
---|
9358 | }
|
---|
9359 |
|
---|
9360 | bool xpath_variable_set::set(const char_t* name, double value)
|
---|
9361 | {
|
---|
9362 | xpath_variable* var = add(name, xpath_type_number);
|
---|
9363 | return var ? var->set(value) : false;
|
---|
9364 | }
|
---|
9365 |
|
---|
9366 | bool xpath_variable_set::set(const char_t* name, const char_t* value)
|
---|
9367 | {
|
---|
9368 | xpath_variable* var = add(name, xpath_type_string);
|
---|
9369 | return var ? var->set(value) : false;
|
---|
9370 | }
|
---|
9371 |
|
---|
9372 | bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
|
---|
9373 | {
|
---|
9374 | xpath_variable* var = add(name, xpath_type_node_set);
|
---|
9375 | return var ? var->set(value) : false;
|
---|
9376 | }
|
---|
9377 |
|
---|
9378 | xpath_variable* xpath_variable_set::get(const char_t* name)
|
---|
9379 | {
|
---|
9380 | return find(name);
|
---|
9381 | }
|
---|
9382 |
|
---|
9383 | const xpath_variable* xpath_variable_set::get(const char_t* name) const
|
---|
9384 | {
|
---|
9385 | return find(name);
|
---|
9386 | }
|
---|
9387 |
|
---|
9388 | xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
|
---|
9389 | {
|
---|
9390 | xpath_query_impl* impl = xpath_query_impl::create();
|
---|
9391 |
|
---|
9392 | if (!impl)
|
---|
9393 | {
|
---|
9394 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
9395 | _result.error = "Out of memory";
|
---|
9396 | #else
|
---|
9397 | throw std::bad_alloc();
|
---|
9398 | #endif
|
---|
9399 | }
|
---|
9400 | else
|
---|
9401 | {
|
---|
9402 | buffer_holder impl_holder(impl, xpath_query_impl::destroy);
|
---|
9403 |
|
---|
9404 | impl->root = xpath_parser::parse(query, variables, &impl->alloc, &_result);
|
---|
9405 |
|
---|
9406 | if (impl->root)
|
---|
9407 | {
|
---|
9408 | _impl = static_cast<xpath_query_impl*>(impl_holder.release());
|
---|
9409 | _result.error = 0;
|
---|
9410 | }
|
---|
9411 | }
|
---|
9412 | }
|
---|
9413 |
|
---|
9414 | xpath_query::~xpath_query()
|
---|
9415 | {
|
---|
9416 | xpath_query_impl::destroy(_impl);
|
---|
9417 | }
|
---|
9418 |
|
---|
9419 | xpath_value_type xpath_query::return_type() const
|
---|
9420 | {
|
---|
9421 | if (!_impl) return xpath_type_none;
|
---|
9422 |
|
---|
9423 | return static_cast<xpath_query_impl*>(_impl)->root->rettype();
|
---|
9424 | }
|
---|
9425 |
|
---|
9426 | bool xpath_query::evaluate_boolean(const xpath_node& n) const
|
---|
9427 | {
|
---|
9428 | if (!_impl) return false;
|
---|
9429 |
|
---|
9430 | xpath_context c(n, 1, 1);
|
---|
9431 | xpath_stack_data sd;
|
---|
9432 |
|
---|
9433 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
9434 | if (setjmp(sd.error_handler)) return false;
|
---|
9435 | #endif
|
---|
9436 |
|
---|
9437 | return static_cast<xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
|
---|
9438 | }
|
---|
9439 |
|
---|
9440 | double xpath_query::evaluate_number(const xpath_node& n) const
|
---|
9441 | {
|
---|
9442 | if (!_impl) return gen_nan();
|
---|
9443 |
|
---|
9444 | xpath_context c(n, 1, 1);
|
---|
9445 | xpath_stack_data sd;
|
---|
9446 |
|
---|
9447 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
9448 | if (setjmp(sd.error_handler)) return gen_nan();
|
---|
9449 | #endif
|
---|
9450 |
|
---|
9451 | return static_cast<xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
|
---|
9452 | }
|
---|
9453 |
|
---|
9454 | #ifndef PUGIXML_NO_STL
|
---|
9455 | string_t xpath_query::evaluate_string(const xpath_node& n) const
|
---|
9456 | {
|
---|
9457 | xpath_stack_data sd;
|
---|
9458 |
|
---|
9459 | return evaluate_string_impl(static_cast<xpath_query_impl*>(_impl), n, sd).c_str();
|
---|
9460 | }
|
---|
9461 | #endif
|
---|
9462 |
|
---|
9463 | size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
|
---|
9464 | {
|
---|
9465 | xpath_stack_data sd;
|
---|
9466 |
|
---|
9467 | xpath_string r = evaluate_string_impl(static_cast<xpath_query_impl*>(_impl), n, sd);
|
---|
9468 |
|
---|
9469 | size_t full_size = r.length() + 1;
|
---|
9470 |
|
---|
9471 | if (capacity > 0)
|
---|
9472 | {
|
---|
9473 | size_t size = (full_size < capacity) ? full_size : capacity;
|
---|
9474 | assert(size > 0);
|
---|
9475 |
|
---|
9476 | memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
|
---|
9477 | buffer[size - 1] = 0;
|
---|
9478 | }
|
---|
9479 |
|
---|
9480 | return full_size;
|
---|
9481 | }
|
---|
9482 |
|
---|
9483 | xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
|
---|
9484 | {
|
---|
9485 | if (!_impl) return xpath_node_set();
|
---|
9486 |
|
---|
9487 | xpath_ast_node* root = static_cast<xpath_query_impl*>(_impl)->root;
|
---|
9488 |
|
---|
9489 | if (root->rettype() != xpath_type_node_set)
|
---|
9490 | {
|
---|
9491 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
9492 | return xpath_node_set();
|
---|
9493 | #else
|
---|
9494 | xpath_parse_result result;
|
---|
9495 | result.error = "Expression does not evaluate to node set";
|
---|
9496 |
|
---|
9497 | throw xpath_exception(result);
|
---|
9498 | #endif
|
---|
9499 | }
|
---|
9500 |
|
---|
9501 | xpath_context c(n, 1, 1);
|
---|
9502 | xpath_stack_data sd;
|
---|
9503 |
|
---|
9504 | #ifdef PUGIXML_NO_EXCEPTIONS
|
---|
9505 | if (setjmp(sd.error_handler)) return xpath_node_set();
|
---|
9506 | #endif
|
---|
9507 |
|
---|
9508 | xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
|
---|
9509 |
|
---|
9510 | return xpath_node_set(r.begin(), r.end(), r.type());
|
---|
9511 | }
|
---|
9512 |
|
---|
9513 | const xpath_parse_result& xpath_query::result() const
|
---|
9514 | {
|
---|
9515 | return _result;
|
---|
9516 | }
|
---|
9517 |
|
---|
9518 | xpath_query::operator xpath_query::unspecified_bool_type() const
|
---|
9519 | {
|
---|
9520 | return _impl ? &xpath_query::_impl : 0;
|
---|
9521 | }
|
---|
9522 |
|
---|
9523 | bool xpath_query::operator!() const
|
---|
9524 | {
|
---|
9525 | return !_impl;
|
---|
9526 | }
|
---|
9527 |
|
---|
9528 | xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
|
---|
9529 | {
|
---|
9530 | xpath_query q(query, variables);
|
---|
9531 | return select_single_node(q);
|
---|
9532 | }
|
---|
9533 |
|
---|
9534 | xpath_node xml_node::select_single_node(const xpath_query& query) const
|
---|
9535 | {
|
---|
9536 | xpath_node_set s = query.evaluate_node_set(*this);
|
---|
9537 | return s.empty() ? xpath_node() : s.first();
|
---|
9538 | }
|
---|
9539 |
|
---|
9540 | xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
|
---|
9541 | {
|
---|
9542 | xpath_query q(query, variables);
|
---|
9543 | return select_nodes(q);
|
---|
9544 | }
|
---|
9545 |
|
---|
9546 | xpath_node_set xml_node::select_nodes(const xpath_query& query) const
|
---|
9547 | {
|
---|
9548 | return query.evaluate_node_set(*this);
|
---|
9549 | }
|
---|
9550 | }
|
---|
9551 |
|
---|
9552 | #endif
|
---|
9553 |
|
---|
9554 | /**
|
---|
9555 | * Copyright (c) 2006-2010 Arseny Kapoulkine
|
---|
9556 | *
|
---|
9557 | * Permission is hereby granted, free of charge, to any person
|
---|
9558 | * obtaining a copy of this software and associated documentation
|
---|
9559 | * files (the "Software"), to deal in the Software without
|
---|
9560 | * restriction, including without limitation the rights to use,
|
---|
9561 | * copy, modify, merge, publish, distribute, sublicense, and/or sell
|
---|
9562 | * copies of the Software, and to permit persons to whom the
|
---|
9563 | * Software is furnished to do so, subject to the following
|
---|
9564 | * conditions:
|
---|
9565 | *
|
---|
9566 | * The above copyright notice and this permission notice shall be
|
---|
9567 | * included in all copies or substantial portions of the Software.
|
---|
9568 | *
|
---|
9569 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
---|
9570 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
---|
9571 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
---|
9572 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
---|
9573 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
---|
9574 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
---|
9575 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
---|
9576 | * OTHER DEALINGS IN THE SOFTWARE.
|
---|
9577 | */
|
---|