source: src/thirdparty/pugixml/pugixml.cpp@ dfed1c

Last change on this file since dfed1c was dfed1c, checked in by Julian Iseringhausen <isering@…>, 14 years ago

Major vmg update.

git-svn-id: https://svn.version.fz-juelich.de/scafacos/trunk@1136 5161e1c8-67bf-11de-9fd5-51895aff932f

  • Property mode set to 100644
File size: 242.4 KB
Line 
1/**
2 * pugixml parser - version 1.0
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at http://pugixml.org/
6 *
7 * This library is distributed under the MIT License. See notice at the end
8 * of this file.
9 *
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12 */
13
14#include "pugixml.hpp"
15
16#include <stdlib.h>
17#include <stdio.h>
18#include <string.h>
19#include <assert.h>
20#include <setjmp.h>
21#include <wchar.h>
22
23#ifndef PUGIXML_NO_XPATH
24# include <math.h>
25# include <float.h>
26#endif
27
28#ifndef PUGIXML_NO_STL
29# include <istream>
30# include <ostream>
31# include <string>
32#endif
33
34// For placement new
35#include <new>
36
37#ifdef _MSC_VER
38# pragma warning(disable: 4127) // conditional expression is constant
39# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
40# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
41# pragma warning(disable: 4702) // unreachable code
42# pragma warning(disable: 4996) // this function or variable may be unsafe
43#endif
44
45#ifdef __INTEL_COMPILER
46# pragma warning(disable: 177) // function was declared but never referenced
47# pragma warning(disable: 1478 1786) // function was declared "deprecated"
48#endif
49
50#ifdef __BORLANDC__
51# pragma warn -8008 // condition is always false
52# pragma warn -8066 // unreachable code
53#endif
54
55#ifdef __SNC__
56# pragma diag_suppress=178 // function was declared but never referenced
57# pragma diag_suppress=237 // controlling expression is constant
58#endif
59
60// uintptr_t
61#if !defined(_MSC_VER) || _MSC_VER >= 1600
62# include <stdint.h>
63#else
64# if _MSC_VER < 1300
65// No native uintptr_t in MSVC6
66typedef size_t uintptr_t;
67# endif
68typedef unsigned __int8 uint8_t;
69typedef unsigned __int16 uint16_t;
70typedef unsigned __int32 uint32_t;
71typedef __int32 int32_t;
72#endif
73
74// Inlining controls
75#if defined(_MSC_VER) && _MSC_VER >= 1300
76# define PUGIXML_NO_INLINE __declspec(noinline)
77#elif defined(__GNUC__)
78# define PUGIXML_NO_INLINE __attribute__((noinline))
79#else
80# define PUGIXML_NO_INLINE
81#endif
82
83// Simple static assertion
84#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
85
86// Digital Mars C++ bug workaround for passing char loaded from memory via stack
87#ifdef __DMC__
88# define DMC_VOLATILE volatile
89#else
90# define DMC_VOLATILE
91#endif
92
93using namespace pugi;
94
95// Memory allocation
96namespace
97{
98 void* default_allocate(size_t size)
99 {
100 return malloc(size);
101 }
102
103 void default_deallocate(void* ptr)
104 {
105 free(ptr);
106 }
107
108 allocation_function global_allocate = default_allocate;
109 deallocation_function global_deallocate = default_deallocate;
110}
111
112// String utilities
113namespace
114{
115 // Get string length
116 size_t strlength(const char_t* s)
117 {
118 assert(s);
119
120 #ifdef PUGIXML_WCHAR_MODE
121 return wcslen(s);
122 #else
123 return strlen(s);
124 #endif
125 }
126
127 // Compare two strings
128 bool strequal(const char_t* src, const char_t* dst)
129 {
130 assert(src && dst);
131
132 #ifdef PUGIXML_WCHAR_MODE
133 return wcscmp(src, dst) == 0;
134 #else
135 return strcmp(src, dst) == 0;
136 #endif
137 }
138
139 // Compare lhs with [rhs_begin, rhs_end)
140 bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
141 {
142 for (size_t i = 0; i < count; ++i)
143 if (lhs[i] != rhs[i])
144 return false;
145
146 return lhs[count] == 0;
147 }
148
149#ifdef PUGIXML_WCHAR_MODE
150 // Convert string to wide string, assuming all symbols are ASCII
151 void widen_ascii(wchar_t* dest, const char* source)
152 {
153 for (const char* i = source; *i; ++i) *dest++ = *i;
154 *dest = 0;
155 }
156#endif
157}
158
159#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
160// auto_ptr-like buffer holder for exception recovery
161namespace
162{
163 struct buffer_holder
164 {
165 void* data;
166 void (*deleter)(void*);
167
168 buffer_holder(void* data, void (*deleter)(void*)): data(data), deleter(deleter)
169 {
170 }
171
172 ~buffer_holder()
173 {
174 if (data) deleter(data);
175 }
176
177 void* release()
178 {
179 void* result = data;
180 data = 0;
181 return result;
182 }
183 };
184}
185#endif
186
187namespace
188{
189 static const size_t xml_memory_page_size = 32768;
190
191 static const uintptr_t xml_memory_page_alignment = 32;
192 static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
193 static const uintptr_t xml_memory_page_name_allocated_mask = 16;
194 static const uintptr_t xml_memory_page_value_allocated_mask = 8;
195 static const uintptr_t xml_memory_page_type_mask = 7;
196
197 struct xml_allocator;
198
199 struct xml_memory_page
200 {
201 static xml_memory_page* construct(void* memory)
202 {
203 if (!memory) return 0; //$ redundant, left for performance
204
205 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
206
207 result->allocator = 0;
208 result->memory = 0;
209 result->prev = 0;
210 result->next = 0;
211 result->busy_size = 0;
212 result->freed_size = 0;
213
214 return result;
215 }
216
217 xml_allocator* allocator;
218
219 void* memory;
220
221 xml_memory_page* prev;
222 xml_memory_page* next;
223
224 size_t busy_size;
225 size_t freed_size;
226
227 char data[1];
228 };
229
230 struct xml_memory_string_header
231 {
232 uint16_t page_offset; // offset from page->data
233 uint16_t full_size; // 0 if string occupies whole page
234 };
235
236 struct xml_allocator
237 {
238 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
239 {
240 }
241
242 xml_memory_page* allocate_page(size_t data_size)
243 {
244 size_t size = offsetof(xml_memory_page, data) + data_size;
245
246 // allocate block with some alignment, leaving memory for worst-case padding
247 void* memory = global_allocate(size + xml_memory_page_alignment);
248 if (!memory) return 0;
249
250 // align upwards to page boundary
251 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
252
253 // prepare page structure
254 xml_memory_page* page = xml_memory_page::construct(page_memory);
255
256 page->memory = memory;
257 page->allocator = _root->allocator;
258
259 return page;
260 }
261
262 static void deallocate_page(xml_memory_page* page)
263 {
264 global_deallocate(page->memory);
265 }
266
267 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
268
269 void* allocate_memory(size_t size, xml_memory_page*& out_page)
270 {
271 if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
272
273 void* buf = _root->data + _busy_size;
274
275 _busy_size += size;
276
277 out_page = _root;
278
279 return buf;
280 }
281
282 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
283 {
284 if (page == _root) page->busy_size = _busy_size;
285
286 assert(ptr >= page->data && ptr < page->data + page->busy_size);
287 (void)!ptr;
288
289 page->freed_size += size;
290 assert(page->freed_size <= page->busy_size);
291
292 if (page->freed_size == page->busy_size)
293 {
294 if (page->next == 0)
295 {
296 assert(_root == page);
297
298 // top page freed, just reset sizes
299 page->busy_size = page->freed_size = 0;
300 _busy_size = 0;
301 }
302 else
303 {
304 assert(_root != page);
305 assert(page->prev);
306
307 // remove from the list
308 page->prev->next = page->next;
309 page->next->prev = page->prev;
310
311 // deallocate
312 deallocate_page(page);
313 }
314 }
315 }
316
317 char_t* allocate_string(size_t length)
318 {
319 // allocate memory for string and header block
320 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
321
322 // round size up to pointer alignment boundary
323 size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
324
325 xml_memory_page* page;
326 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
327
328 if (!header) return 0;
329
330 // setup header
331 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
332
333 assert(page_offset >= 0 && page_offset < (1 << 16));
334 header->page_offset = static_cast<uint16_t>(page_offset);
335
336 // full_size == 0 for large strings that occupy the whole page
337 assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
338 header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
339
340 return reinterpret_cast<char_t*>(header + 1);
341 }
342
343 void deallocate_string(char_t* string)
344 {
345 // get header
346 xml_memory_string_header* header = reinterpret_cast<xml_memory_string_header*>(string) - 1;
347
348 // deallocate
349 size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
350 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(reinterpret_cast<char*>(header) - page_offset);
351
352 // if full_size == 0 then this string occupies the whole page
353 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
354
355 deallocate_memory(header, full_size, page);
356 }
357
358 xml_memory_page* _root;
359 size_t _busy_size;
360 };
361
362 PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
363 {
364 const size_t large_allocation_threshold = xml_memory_page_size / 4;
365
366 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
367 if (!page) return 0;
368
369 if (size <= large_allocation_threshold)
370 {
371 _root->busy_size = _busy_size;
372
373 // insert page at the end of linked list
374 page->prev = _root;
375 _root->next = page;
376 _root = page;
377
378 _busy_size = size;
379 }
380 else
381 {
382 // insert page before the end of linked list, so that it is deleted as soon as possible
383 // the last page is not deleted even if it's empty (see deallocate_memory)
384 assert(_root->prev);
385
386 page->prev = _root->prev;
387 page->next = _root;
388
389 _root->prev->next = page;
390 _root->prev = page;
391 }
392
393 // allocate inside page
394 page->busy_size = size;
395
396 out_page = page;
397 return page->data;
398 }
399}
400
401namespace pugi
402{
403 /// A 'name=value' XML attribute structure.
404 struct xml_attribute_struct
405 {
406 /// Default ctor
407 xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
408 {
409 }
410
411 uintptr_t header;
412
413 char_t* name; ///< Pointer to attribute name.
414 char_t* value; ///< Pointer to attribute value.
415
416 xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
417 xml_attribute_struct* next_attribute; ///< Next attribute
418 };
419
420 /// An XML document tree node.
421 struct xml_node_struct
422 {
423 /// Default ctor
424 /// \param type - node type
425 xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
426 {
427 }
428
429 uintptr_t header;
430
431 xml_node_struct* parent; ///< Pointer to parent
432
433 char_t* name; ///< Pointer to element name.
434 char_t* value; ///< Pointer to any associated string data.
435
436 xml_node_struct* first_child; ///< First child
437
438 xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
439 xml_node_struct* next_sibling; ///< Right brother
440
441 xml_attribute_struct* first_attribute; ///< First attribute
442 };
443}
444
445namespace
446{
447 struct xml_document_struct: public xml_node_struct, public xml_allocator
448 {
449 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0)
450 {
451 }
452
453 const char_t* buffer;
454 };
455
456 static inline xml_allocator& get_allocator(const xml_node_struct* node)
457 {
458 assert(node);
459
460 return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
461 }
462}
463
464// Low-level DOM operations
465namespace
466{
467 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
468 {
469 xml_memory_page* page;
470 void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
471
472 return new (memory) xml_attribute_struct(page);
473 }
474
475 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
476 {
477 xml_memory_page* page;
478 void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
479
480 return new (memory) xml_node_struct(page, type);
481 }
482
483 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
484 {
485 uintptr_t header = a->header;
486
487 if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
488 if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
489
490 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
491 }
492
493 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
494 {
495 uintptr_t header = n->header;
496
497 if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
498 if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
499
500 for (xml_attribute_struct* attr = n->first_attribute; attr; )
501 {
502 xml_attribute_struct* next = attr->next_attribute;
503
504 destroy_attribute(attr, alloc);
505
506 attr = next;
507 }
508
509 for (xml_node_struct* child = n->first_child; child; )
510 {
511 xml_node_struct* next = child->next_sibling;
512
513 destroy_node(child, alloc);
514
515 child = next;
516 }
517
518 alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
519 }
520
521 PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
522 {
523 xml_node_struct* child = allocate_node(alloc, type);
524 if (!child) return 0;
525
526 child->parent = node;
527
528 xml_node_struct* first_child = node->first_child;
529
530 if (first_child)
531 {
532 xml_node_struct* last_child = first_child->prev_sibling_c;
533
534 last_child->next_sibling = child;
535 child->prev_sibling_c = last_child;
536 first_child->prev_sibling_c = child;
537 }
538 else
539 {
540 node->first_child = child;
541 child->prev_sibling_c = child;
542 }
543
544 return child;
545 }
546
547 PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
548 {
549 xml_attribute_struct* a = allocate_attribute(alloc);
550 if (!a) return 0;
551
552 xml_attribute_struct* first_attribute = node->first_attribute;
553
554 if (first_attribute)
555 {
556 xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
557
558 last_attribute->next_attribute = a;
559 a->prev_attribute_c = last_attribute;
560 first_attribute->prev_attribute_c = a;
561 }
562 else
563 {
564 node->first_attribute = a;
565 a->prev_attribute_c = a;
566 }
567
568 return a;
569 }
570}
571
572// Helper classes for code generation
573namespace
574{
575 struct opt_false
576 {
577 enum { value = 0 };
578 };
579
580 struct opt_true
581 {
582 enum { value = 1 };
583 };
584}
585
586// Unicode utilities
587namespace
588{
589 inline uint16_t endian_swap(uint16_t value)
590 {
591 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
592 }
593
594 inline uint32_t endian_swap(uint32_t value)
595 {
596 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
597 }
598
599 struct utf8_counter
600 {
601 typedef size_t value_type;
602
603 static value_type low(value_type result, uint32_t ch)
604 {
605 // U+0000..U+007F
606 if (ch < 0x80) return result + 1;
607 // U+0080..U+07FF
608 else if (ch < 0x800) return result + 2;
609 // U+0800..U+FFFF
610 else return result + 3;
611 }
612
613 static value_type high(value_type result, uint32_t)
614 {
615 // U+10000..U+10FFFF
616 return result + 4;
617 }
618 };
619
620 struct utf8_writer
621 {
622 typedef uint8_t* value_type;
623
624 static value_type low(value_type result, uint32_t ch)
625 {
626 // U+0000..U+007F
627 if (ch < 0x80)
628 {
629 *result = static_cast<uint8_t>(ch);
630 return result + 1;
631 }
632 // U+0080..U+07FF
633 else if (ch < 0x800)
634 {
635 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
636 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
637 return result + 2;
638 }
639 // U+0800..U+FFFF
640 else
641 {
642 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
643 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
644 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
645 return result + 3;
646 }
647 }
648
649 static value_type high(value_type result, uint32_t ch)
650 {
651 // U+10000..U+10FFFF
652 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
653 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
654 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
655 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
656 return result + 4;
657 }
658
659 static value_type any(value_type result, uint32_t ch)
660 {
661 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
662 }
663 };
664
665 struct utf16_counter
666 {
667 typedef size_t value_type;
668
669 static value_type low(value_type result, uint32_t)
670 {
671 return result + 1;
672 }
673
674 static value_type high(value_type result, uint32_t)
675 {
676 return result + 2;
677 }
678 };
679
680 struct utf16_writer
681 {
682 typedef uint16_t* value_type;
683
684 static value_type low(value_type result, uint32_t ch)
685 {
686 *result = static_cast<uint16_t>(ch);
687
688 return result + 1;
689 }
690
691 static value_type high(value_type result, uint32_t ch)
692 {
693 uint32_t msh = (uint32_t)(ch - 0x10000) >> 10;
694 uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff;
695
696 result[0] = static_cast<uint16_t>(0xD800 + msh);
697 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
698
699 return result + 2;
700 }
701
702 static value_type any(value_type result, uint32_t ch)
703 {
704 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
705 }
706 };
707
708 struct utf32_counter
709 {
710 typedef size_t value_type;
711
712 static value_type low(value_type result, uint32_t)
713 {
714 return result + 1;
715 }
716
717 static value_type high(value_type result, uint32_t)
718 {
719 return result + 1;
720 }
721 };
722
723 struct utf32_writer
724 {
725 typedef uint32_t* value_type;
726
727 static value_type low(value_type result, uint32_t ch)
728 {
729 *result = ch;
730
731 return result + 1;
732 }
733
734 static value_type high(value_type result, uint32_t ch)
735 {
736 *result = ch;
737
738 return result + 1;
739 }
740
741 static value_type any(value_type result, uint32_t ch)
742 {
743 *result = ch;
744
745 return result + 1;
746 }
747 };
748
749 template <size_t size> struct wchar_selector;
750
751 template <> struct wchar_selector<2>
752 {
753 typedef uint16_t type;
754 typedef utf16_counter counter;
755 typedef utf16_writer writer;
756 };
757
758 template <> struct wchar_selector<4>
759 {
760 typedef uint32_t type;
761 typedef utf32_counter counter;
762 typedef utf32_writer writer;
763 };
764
765 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
766 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
767
768 template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
769 {
770 static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
771 {
772 const uint8_t utf8_byte_mask = 0x3f;
773
774 while (size)
775 {
776 uint8_t lead = *data;
777
778 // 0xxxxxxx -> U+0000..U+007F
779 if (lead < 0x80)
780 {
781 result = Traits::low(result, lead);
782 data += 1;
783 size -= 1;
784
785 // process aligned single-byte (ascii) blocks
786 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
787 {
788 while (size >= 4 && (*reinterpret_cast<const uint32_t*>(data) & 0x80808080) == 0)
789 {
790 result = Traits::low(result, data[0]);
791 result = Traits::low(result, data[1]);
792 result = Traits::low(result, data[2]);
793 result = Traits::low(result, data[3]);
794 data += 4;
795 size -= 4;
796 }
797 }
798 }
799 // 110xxxxx -> U+0080..U+07FF
800 else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
801 {
802 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
803 data += 2;
804 size -= 2;
805 }
806 // 1110xxxx -> U+0800-U+FFFF
807 else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
808 {
809 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
810 data += 3;
811 size -= 3;
812 }
813 // 11110xxx -> U+10000..U+10FFFF
814 else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
815 {
816 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
817 data += 4;
818 size -= 4;
819 }
820 // 10xxxxxx or 11111xxx -> invalid
821 else
822 {
823 data += 1;
824 size -= 1;
825 }
826 }
827
828 return result;
829 }
830
831 static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
832 {
833 const uint16_t* end = data + size;
834
835 while (data < end)
836 {
837 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
838
839 // U+0000..U+D7FF
840 if (lead < 0xD800)
841 {
842 result = Traits::low(result, lead);
843 data += 1;
844 }
845 // U+E000..U+FFFF
846 else if ((unsigned)(lead - 0xE000) < 0x2000)
847 {
848 result = Traits::low(result, lead);
849 data += 1;
850 }
851 // surrogate pair lead
852 else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end)
853 {
854 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
855
856 if ((unsigned)(next - 0xDC00) < 0x400)
857 {
858 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
859 data += 2;
860 }
861 else
862 {
863 data += 1;
864 }
865 }
866 else
867 {
868 data += 1;
869 }
870 }
871
872 return result;
873 }
874
875 static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
876 {
877 const uint32_t* end = data + size;
878
879 while (data < end)
880 {
881 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
882
883 // U+0000..U+FFFF
884 if (lead < 0x10000)
885 {
886 result = Traits::low(result, lead);
887 data += 1;
888 }
889 // U+10000..U+10FFFF
890 else
891 {
892 result = Traits::high(result, lead);
893 data += 1;
894 }
895 }
896
897 return result;
898 }
899 };
900
901 template <typename T> inline void convert_utf_endian_swap(T* result, const T* data, size_t length)
902 {
903 for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
904 }
905
906 inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
907 {
908 for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
909 }
910}
911
912namespace
913{
914 enum chartype_t
915 {
916 ct_parse_pcdata = 1, // \0, &, \r, <
917 ct_parse_attr = 2, // \0, &, \r, ', "
918 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
919 ct_space = 8, // \r, \n, space, tab
920 ct_parse_cdata = 16, // \0, ], >, \r
921 ct_parse_comment = 32, // \0, -, >, \r
922 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
923 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
924 };
925
926 const unsigned char chartype_table[256] =
927 {
928 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
929 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
930 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
931 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
932 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
933 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
934 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
935 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
936
937 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
938 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
939 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
940 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
941 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
942 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
943 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
944 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
945 };
946
947 enum chartypex_t
948 {
949 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
950 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
951 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
952 ctx_digit = 8, // 0-9
953 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
954 };
955
956 const unsigned char chartypex_table[256] =
957 {
958 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
959 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
960 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
961 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
962
963 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
964 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
965 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
966 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
967
968 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
969 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
970 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
971 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
972 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
973 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
974 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
975 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
976 };
977
978#ifdef PUGIXML_WCHAR_MODE
979 #define IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
980#else
981 #define IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
982#endif
983
984 #define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table)
985 #define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table)
986
987 bool is_little_endian()
988 {
989 unsigned int ui = 1;
990
991 return *reinterpret_cast<unsigned char*>(&ui) == 1;
992 }
993
994 xml_encoding get_wchar_encoding()
995 {
996 STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
997
998 if (sizeof(wchar_t) == 2)
999 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1000 else
1001 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1002 }
1003
1004 xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
1005 {
1006 // look for BOM in first few bytes
1007 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1008 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1009 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1010 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1011 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1012
1013 // look for <, <? or <?xm in various encodings
1014 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1015 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1016 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1017 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1018 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
1019
1020 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1021 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1022 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1023
1024 // no known BOM detected, assume utf8
1025 return encoding_utf8;
1026 }
1027
1028 xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
1029 {
1030 // replace wchar encoding with utf implementation
1031 if (encoding == encoding_wchar) return get_wchar_encoding();
1032
1033 // replace utf16 encoding with utf16 with specific endianness
1034 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1035
1036 // replace utf32 encoding with utf32 with specific endianness
1037 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1038
1039 // only do autodetection if no explicit encoding is requested
1040 if (encoding != encoding_auto) return encoding;
1041
1042 // skip encoding autodetection if input buffer is too small
1043 if (size < 4) return encoding_utf8;
1044
1045 // try to guess encoding (based on XML specification, Appendix F.1)
1046 const uint8_t* data = static_cast<const uint8_t*>(contents);
1047
1048 DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1049
1050 return guess_buffer_encoding(d0, d1, d2, d3);
1051 }
1052
1053 bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1054 {
1055 if (is_mutable)
1056 {
1057 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
1058 }
1059 else
1060 {
1061 void* buffer = global_allocate(size > 0 ? size : 1);
1062 if (!buffer) return false;
1063
1064 memcpy(buffer, contents, size);
1065
1066 out_buffer = static_cast<char_t*>(buffer);
1067 }
1068
1069 out_length = size / sizeof(char_t);
1070
1071 return true;
1072 }
1073
1074#ifdef PUGIXML_WCHAR_MODE
1075 inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
1076 {
1077 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
1078 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
1079 }
1080
1081 bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1082 {
1083 const char_t* data = static_cast<const char_t*>(contents);
1084
1085 if (is_mutable)
1086 {
1087 out_buffer = const_cast<char_t*>(data);
1088 }
1089 else
1090 {
1091 out_buffer = static_cast<char_t*>(global_allocate(size > 0 ? size : 1));
1092 if (!out_buffer) return false;
1093 }
1094
1095 out_length = size / sizeof(char_t);
1096
1097 convert_wchar_endian_swap(out_buffer, data, out_length);
1098
1099 return true;
1100 }
1101
1102 bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
1103 {
1104 const uint8_t* data = static_cast<const uint8_t*>(contents);
1105
1106 // first pass: get length in wchar_t units
1107 out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1108
1109 // allocate buffer of suitable length
1110 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1111 if (!out_buffer) return false;
1112
1113 // second pass: convert utf8 input to wchar_t
1114 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1115 wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
1116
1117 assert(out_end == out_begin + out_length);
1118 (void)!out_end;
1119
1120 return true;
1121 }
1122
1123 template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1124 {
1125 const uint16_t* data = static_cast<const uint16_t*>(contents);
1126 size_t length = size / sizeof(uint16_t);
1127
1128 // first pass: get length in wchar_t units
1129 out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
1130
1131 // allocate buffer of suitable length
1132 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1133 if (!out_buffer) return false;
1134
1135 // second pass: convert utf16 input to wchar_t
1136 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1137 wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
1138
1139 assert(out_end == out_begin + out_length);
1140 (void)!out_end;
1141
1142 return true;
1143 }
1144
1145 template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1146 {
1147 const uint32_t* data = static_cast<const uint32_t*>(contents);
1148 size_t length = size / sizeof(uint32_t);
1149
1150 // first pass: get length in wchar_t units
1151 out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
1152
1153 // allocate buffer of suitable length
1154 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1155 if (!out_buffer) return false;
1156
1157 // second pass: convert utf32 input to wchar_t
1158 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1159 wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
1160
1161 assert(out_end == out_begin + out_length);
1162 (void)!out_end;
1163
1164 return true;
1165 }
1166
1167 bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1168 {
1169 // get native encoding
1170 xml_encoding wchar_encoding = get_wchar_encoding();
1171
1172 // fast path: no conversion required
1173 if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1174
1175 // only endian-swapping is required
1176 if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
1177
1178 // source encoding is utf8
1179 if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
1180
1181 // source encoding is utf16
1182 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1183 {
1184 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1185
1186 return (native_encoding == encoding) ?
1187 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1188 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1189 }
1190
1191 // source encoding is utf32
1192 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1193 {
1194 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1195
1196 return (native_encoding == encoding) ?
1197 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1198 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1199 }
1200
1201 assert(!"Invalid encoding");
1202 return false;
1203 }
1204#else
1205 template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1206 {
1207 const uint16_t* data = static_cast<const uint16_t*>(contents);
1208 size_t length = size / sizeof(uint16_t);
1209
1210 // first pass: get length in utf8 units
1211 out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
1212
1213 // allocate buffer of suitable length
1214 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1215 if (!out_buffer) return false;
1216
1217 // second pass: convert utf16 input to utf8
1218 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1219 uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
1220
1221 assert(out_end == out_begin + out_length);
1222 (void)!out_end;
1223
1224 return true;
1225 }
1226
1227 template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1228 {
1229 const uint32_t* data = static_cast<const uint32_t*>(contents);
1230 size_t length = size / sizeof(uint32_t);
1231
1232 // first pass: get length in utf8 units
1233 out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
1234
1235 // allocate buffer of suitable length
1236 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1237 if (!out_buffer) return false;
1238
1239 // second pass: convert utf32 input to utf8
1240 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1241 uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
1242
1243 assert(out_end == out_begin + out_length);
1244 (void)!out_end;
1245
1246 return true;
1247 }
1248
1249 bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1250 {
1251 // fast path: no conversion required
1252 if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1253
1254 // source encoding is utf16
1255 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1256 {
1257 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1258
1259 return (native_encoding == encoding) ?
1260 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1261 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1262 }
1263
1264 // source encoding is utf32
1265 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1266 {
1267 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1268
1269 return (native_encoding == encoding) ?
1270 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1271 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1272 }
1273
1274 assert(!"Invalid encoding");
1275 return false;
1276 }
1277#endif
1278
1279 size_t as_utf8_begin(const wchar_t* str, size_t length)
1280 {
1281 STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1282
1283 // get length in utf8 characters
1284 return sizeof(wchar_t) == 2 ?
1285 utf_decoder<utf8_counter>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, 0) :
1286 utf_decoder<utf8_counter>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, 0);
1287 }
1288
1289 void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
1290 {
1291 STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1292
1293 // convert to utf8
1294 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
1295 uint8_t* end = sizeof(wchar_t) == 2 ?
1296 utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, begin) :
1297 utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, begin);
1298
1299 assert(begin + size == end);
1300 (void)!end;
1301
1302 // zero-terminate
1303 buffer[size] = 0;
1304 }
1305
1306#ifndef PUGIXML_NO_STL
1307 std::string as_utf8_impl(const wchar_t* str, size_t length)
1308 {
1309 // first pass: get length in utf8 characters
1310 size_t size = as_utf8_begin(str, length);
1311
1312 // allocate resulting string
1313 std::string result;
1314 result.resize(size);
1315
1316 // second pass: convert to utf8
1317 if (size > 0) as_utf8_end(&result[0], size, str, length);
1318
1319 return result;
1320 }
1321
1322 std::wstring as_wide_impl(const char* str, size_t size)
1323 {
1324 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
1325
1326 // first pass: get length in wchar_t units
1327 size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1328
1329 // allocate resulting string
1330 std::wstring result;
1331 result.resize(length);
1332
1333 // second pass: convert to wchar_t
1334 if (length > 0)
1335 {
1336 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
1337 wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
1338
1339 assert(begin + length == end);
1340 (void)!end;
1341 }
1342
1343 return result;
1344 }
1345#endif
1346
1347 inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
1348 {
1349 assert(target);
1350 size_t target_length = strlength(target);
1351
1352 // always reuse document buffer memory if possible
1353 if (!allocated) return target_length >= length;
1354
1355 // reuse heap memory if waste is not too great
1356 const size_t reuse_threshold = 32;
1357
1358 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
1359 }
1360
1361 bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
1362 {
1363 size_t source_length = strlength(source);
1364
1365 if (source_length == 0)
1366 {
1367 // empty string and null pointer are equivalent, so just deallocate old memory
1368 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1369
1370 if (header & header_mask) alloc->deallocate_string(dest);
1371
1372 // mark the string as not allocated
1373 dest = 0;
1374 header &= ~header_mask;
1375
1376 return true;
1377 }
1378 else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
1379 {
1380 // we can reuse old buffer, so just copy the new data (including zero terminator)
1381 memcpy(dest, source, (source_length + 1) * sizeof(char_t));
1382
1383 return true;
1384 }
1385 else
1386 {
1387 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1388
1389 // allocate new buffer
1390 char_t* buf = alloc->allocate_string(source_length + 1);
1391 if (!buf) return false;
1392
1393 // copy the string (including zero terminator)
1394 memcpy(buf, source, (source_length + 1) * sizeof(char_t));
1395
1396 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
1397 if (header & header_mask) alloc->deallocate_string(dest);
1398
1399 // the string is now allocated, so set the flag
1400 dest = buf;
1401 header |= header_mask;
1402
1403 return true;
1404 }
1405 }
1406
1407 struct gap
1408 {
1409 char_t* end;
1410 size_t size;
1411
1412 gap(): end(0), size(0)
1413 {
1414 }
1415
1416 // Push new gap, move s count bytes further (skipping the gap).
1417 // Collapse previous gap.
1418 void push(char_t*& s, size_t count)
1419 {
1420 if (end) // there was a gap already; collapse it
1421 {
1422 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
1423 assert(s >= end);
1424 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1425 }
1426
1427 s += count; // end of current gap
1428
1429 // "merge" two gaps
1430 end = s;
1431 size += count;
1432 }
1433
1434 // Collapse all gaps, return past-the-end pointer
1435 char_t* flush(char_t* s)
1436 {
1437 if (end)
1438 {
1439 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
1440 assert(s >= end);
1441 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1442
1443 return s - size;
1444 }
1445 else return s;
1446 }
1447 };
1448
1449 char_t* strconv_escape(char_t* s, gap& g)
1450 {
1451 char_t* stre = s + 1;
1452
1453 switch (*stre)
1454 {
1455 case '#': // &#...
1456 {
1457 unsigned int ucsc = 0;
1458
1459 if (stre[1] == 'x') // &#x... (hex code)
1460 {
1461 stre += 2;
1462
1463 char_t ch = *stre;
1464
1465 if (ch == ';') return stre;
1466
1467 for (;;)
1468 {
1469 if (static_cast<unsigned int>(ch - '0') <= 9)
1470 ucsc = 16 * ucsc + (ch - '0');
1471 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
1472 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
1473 else if (ch == ';')
1474 break;
1475 else // cancel
1476 return stre;
1477
1478 ch = *++stre;
1479 }
1480
1481 ++stre;
1482 }
1483 else // &#... (dec code)
1484 {
1485 char_t ch = *++stre;
1486
1487 if (ch == ';') return stre;
1488
1489 for (;;)
1490 {
1491 if (static_cast<unsigned int>(ch - '0') <= 9)
1492 ucsc = 10 * ucsc + (ch - '0');
1493 else if (ch == ';')
1494 break;
1495 else // cancel
1496 return stre;
1497
1498 ch = *++stre;
1499 }
1500
1501 ++stre;
1502 }
1503
1504 #ifdef PUGIXML_WCHAR_MODE
1505 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
1506 #else
1507 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
1508 #endif
1509
1510 g.push(s, stre - s);
1511 return stre;
1512 }
1513 case 'a': // &a
1514 {
1515 ++stre;
1516
1517 if (*stre == 'm') // &am
1518 {
1519 if (*++stre == 'p' && *++stre == ';') // &amp;
1520 {
1521 *s++ = '&';
1522 ++stre;
1523
1524 g.push(s, stre - s);
1525 return stre;
1526 }
1527 }
1528 else if (*stre == 'p') // &ap
1529 {
1530 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
1531 {
1532 *s++ = '\'';
1533 ++stre;
1534
1535 g.push(s, stre - s);
1536 return stre;
1537 }
1538 }
1539 break;
1540 }
1541 case 'g': // &g
1542 {
1543 if (*++stre == 't' && *++stre == ';') // &gt;
1544 {
1545 *s++ = '>';
1546 ++stre;
1547
1548 g.push(s, stre - s);
1549 return stre;
1550 }
1551 break;
1552 }
1553 case 'l': // &l
1554 {
1555 if (*++stre == 't' && *++stre == ';') // &lt;
1556 {
1557 *s++ = '<';
1558 ++stre;
1559
1560 g.push(s, stre - s);
1561 return stre;
1562 }
1563 break;
1564 }
1565 case 'q': // &q
1566 {
1567 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
1568 {
1569 *s++ = '"';
1570 ++stre;
1571
1572 g.push(s, stre - s);
1573 return stre;
1574 }
1575 break;
1576 }
1577 }
1578
1579 return stre;
1580 }
1581
1582 // Utility macro for last character handling
1583 #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
1584
1585 char_t* strconv_comment(char_t* s, char_t endch)
1586 {
1587 gap g;
1588
1589 while (true)
1590 {
1591 while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s;
1592
1593 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1594 {
1595 *s++ = '\n'; // replace first one with 0x0a
1596
1597 if (*s == '\n') g.push(s, 1);
1598 }
1599 else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
1600 {
1601 *g.flush(s) = 0;
1602
1603 return s + (s[2] == '>' ? 3 : 2);
1604 }
1605 else if (*s == 0)
1606 {
1607 return 0;
1608 }
1609 else ++s;
1610 }
1611 }
1612
1613 char_t* strconv_cdata(char_t* s, char_t endch)
1614 {
1615 gap g;
1616
1617 while (true)
1618 {
1619 while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
1620
1621 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1622 {
1623 *s++ = '\n'; // replace first one with 0x0a
1624
1625 if (*s == '\n') g.push(s, 1);
1626 }
1627 else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
1628 {
1629 *g.flush(s) = 0;
1630
1631 return s + 1;
1632 }
1633 else if (*s == 0)
1634 {
1635 return 0;
1636 }
1637 else ++s;
1638 }
1639 }
1640
1641 typedef char_t* (*strconv_pcdata_t)(char_t*);
1642
1643 template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
1644 {
1645 static char_t* parse(char_t* s)
1646 {
1647 gap g;
1648
1649 while (true)
1650 {
1651 while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
1652
1653 if (*s == '<') // PCDATA ends here
1654 {
1655 *g.flush(s) = 0;
1656
1657 return s + 1;
1658 }
1659 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1660 {
1661 *s++ = '\n'; // replace first one with 0x0a
1662
1663 if (*s == '\n') g.push(s, 1);
1664 }
1665 else if (opt_escape::value && *s == '&')
1666 {
1667 s = strconv_escape(s, g);
1668 }
1669 else if (*s == 0)
1670 {
1671 return s;
1672 }
1673 else ++s;
1674 }
1675 }
1676 };
1677
1678 strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
1679 {
1680 STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20);
1681
1682 switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes)
1683 {
1684 case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse;
1685 case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse;
1686 case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse;
1687 case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse;
1688 default: return 0; // should not get here
1689 }
1690 }
1691
1692 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
1693
1694 template <typename opt_escape> struct strconv_attribute_impl
1695 {
1696 static char_t* parse_wnorm(char_t* s, char_t end_quote)
1697 {
1698 gap g;
1699
1700 // trim leading whitespaces
1701 if (IS_CHARTYPE(*s, ct_space))
1702 {
1703 char_t* str = s;
1704
1705 do ++str;
1706 while (IS_CHARTYPE(*str, ct_space));
1707
1708 g.push(s, str - s);
1709 }
1710
1711 while (true)
1712 {
1713 while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
1714
1715 if (*s == end_quote)
1716 {
1717 char_t* str = g.flush(s);
1718
1719 do *str-- = 0;
1720 while (IS_CHARTYPE(*str, ct_space));
1721
1722 return s + 1;
1723 }
1724 else if (IS_CHARTYPE(*s, ct_space))
1725 {
1726 *s++ = ' ';
1727
1728 if (IS_CHARTYPE(*s, ct_space))
1729 {
1730 char_t* str = s + 1;
1731 while (IS_CHARTYPE(*str, ct_space)) ++str;
1732
1733 g.push(s, str - s);
1734 }
1735 }
1736 else if (opt_escape::value && *s == '&')
1737 {
1738 s = strconv_escape(s, g);
1739 }
1740 else if (!*s)
1741 {
1742 return 0;
1743 }
1744 else ++s;
1745 }
1746 }
1747
1748 static char_t* parse_wconv(char_t* s, char_t end_quote)
1749 {
1750 gap g;
1751
1752 while (true)
1753 {
1754 while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
1755
1756 if (*s == end_quote)
1757 {
1758 *g.flush(s) = 0;
1759
1760 return s + 1;
1761 }
1762 else if (IS_CHARTYPE(*s, ct_space))
1763 {
1764 if (*s == '\r')
1765 {
1766 *s++ = ' ';
1767
1768 if (*s == '\n') g.push(s, 1);
1769 }
1770 else *s++ = ' ';
1771 }
1772 else if (opt_escape::value && *s == '&')
1773 {
1774 s = strconv_escape(s, g);
1775 }
1776 else if (!*s)
1777 {
1778 return 0;
1779 }
1780 else ++s;
1781 }
1782 }
1783
1784 static char_t* parse_eol(char_t* s, char_t end_quote)
1785 {
1786 gap g;
1787
1788 while (true)
1789 {
1790 while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s;
1791
1792 if (*s == end_quote)
1793 {
1794 *g.flush(s) = 0;
1795
1796 return s + 1;
1797 }
1798 else if (*s == '\r')
1799 {
1800 *s++ = '\n';
1801
1802 if (*s == '\n') g.push(s, 1);
1803 }
1804 else if (opt_escape::value && *s == '&')
1805 {
1806 s = strconv_escape(s, g);
1807 }
1808 else if (!*s)
1809 {
1810 return 0;
1811 }
1812 else ++s;
1813 }
1814 }
1815
1816 static char_t* parse_simple(char_t* s, char_t end_quote)
1817 {
1818 gap g;
1819
1820 while (true)
1821 {
1822 while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s;
1823
1824 if (*s == end_quote)
1825 {
1826 *g.flush(s) = 0;
1827
1828 return s + 1;
1829 }
1830 else if (opt_escape::value && *s == '&')
1831 {
1832 s = strconv_escape(s, g);
1833 }
1834 else if (!*s)
1835 {
1836 return 0;
1837 }
1838 else ++s;
1839 }
1840 }
1841 };
1842
1843 strconv_attribute_t get_strconv_attribute(unsigned int optmask)
1844 {
1845 STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
1846
1847 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
1848 {
1849 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
1850 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
1851 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
1852 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
1853 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
1854 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
1855 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
1856 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
1857 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
1858 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
1859 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
1860 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
1861 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
1862 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
1863 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
1864 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
1865 default: return 0; // should not get here
1866 }
1867 }
1868
1869 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
1870 {
1871 xml_parse_result result;
1872 result.status = status;
1873 result.offset = offset;
1874
1875 return result;
1876 }
1877
1878 struct xml_parser
1879 {
1880 xml_allocator alloc;
1881 char_t* error_offset;
1882 jmp_buf error_handler;
1883
1884 // Parser utilities.
1885 #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; }
1886 #define OPTSET(OPT) ( optmsk & OPT )
1887 #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) THROW_ERROR(status_out_of_memory, s); }
1888 #define POPNODE() { cursor = cursor->parent; }
1889 #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
1890 #define SCANWHILE(X) { while ((X)) ++s; }
1891 #define ENDSEG() { ch = *s; *s = 0; ++s; }
1892 #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err)
1893 #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); }
1894
1895 xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0)
1896 {
1897 }
1898
1899 // DOCTYPE consists of nested sections of the following possible types:
1900 // <!-- ... -->, <? ... ?>, "...", '...'
1901 // <![...]]>
1902 // <!...>
1903 // First group can not contain nested groups
1904 // Second group can contain nested groups of the same type
1905 // Third group can contain all other groups
1906 char_t* parse_doctype_primitive(char_t* s)
1907 {
1908 if (*s == '"' || *s == '\'')
1909 {
1910 // quoted string
1911 char_t ch = *s++;
1912 SCANFOR(*s == ch);
1913 if (!*s) THROW_ERROR(status_bad_doctype, s);
1914
1915 s++;
1916 }
1917 else if (s[0] == '<' && s[1] == '?')
1918 {
1919 // <? ... ?>
1920 s += 2;
1921 SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
1922 if (!*s) THROW_ERROR(status_bad_doctype, s);
1923
1924 s += 2;
1925 }
1926 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
1927 {
1928 s += 4;
1929 SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
1930 if (!*s) THROW_ERROR(status_bad_doctype, s);
1931
1932 s += 4;
1933 }
1934 else THROW_ERROR(status_bad_doctype, s);
1935
1936 return s;
1937 }
1938
1939 char_t* parse_doctype_ignore(char_t* s)
1940 {
1941 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
1942 s++;
1943
1944 while (*s)
1945 {
1946 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
1947 {
1948 // nested ignore section
1949 s = parse_doctype_ignore(s);
1950 }
1951 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
1952 {
1953 // ignore section end
1954 s += 3;
1955
1956 return s;
1957 }
1958 else s++;
1959 }
1960
1961 THROW_ERROR(status_bad_doctype, s);
1962
1963 return s;
1964 }
1965
1966 char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
1967 {
1968 assert(s[0] == '<' && s[1] == '!');
1969 s++;
1970
1971 while (*s)
1972 {
1973 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
1974 {
1975 if (s[2] == '[')
1976 {
1977 // ignore
1978 s = parse_doctype_ignore(s);
1979 }
1980 else
1981 {
1982 // some control group
1983 s = parse_doctype_group(s, endch, false);
1984 }
1985 }
1986 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
1987 {
1988 // unknown tag (forbidden), or some primitive group
1989 s = parse_doctype_primitive(s);
1990 }
1991 else if (*s == '>')
1992 {
1993 s++;
1994
1995 return s;
1996 }
1997 else s++;
1998 }
1999
2000 if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s);
2001
2002 return s;
2003 }
2004
2005 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
2006 {
2007 // parse node contents, starting with exclamation mark
2008 ++s;
2009
2010 if (*s == '-') // '<!-...'
2011 {
2012 ++s;
2013
2014 if (*s == '-') // '<!--...'
2015 {
2016 ++s;
2017
2018 if (OPTSET(parse_comments))
2019 {
2020 PUSHNODE(node_comment); // Append a new node on the tree.
2021 cursor->value = s; // Save the offset.
2022 }
2023
2024 if (OPTSET(parse_eol) && OPTSET(parse_comments))
2025 {
2026 s = strconv_comment(s, endch);
2027
2028 if (!s) THROW_ERROR(status_bad_comment, cursor->value);
2029 }
2030 else
2031 {
2032 // Scan for terminating '-->'.
2033 SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
2034 CHECK_ERROR(status_bad_comment, s);
2035
2036 if (OPTSET(parse_comments))
2037 *s = 0; // Zero-terminate this segment at the first terminating '-'.
2038
2039 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
2040 }
2041 }
2042 else THROW_ERROR(status_bad_comment, s);
2043 }
2044 else if (*s == '[')
2045 {
2046 // '<![CDATA[...'
2047 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
2048 {
2049 ++s;
2050
2051 if (OPTSET(parse_cdata))
2052 {
2053 PUSHNODE(node_cdata); // Append a new node on the tree.
2054 cursor->value = s; // Save the offset.
2055
2056 if (OPTSET(parse_eol))
2057 {
2058 s = strconv_cdata(s, endch);
2059
2060 if (!s) THROW_ERROR(status_bad_cdata, cursor->value);
2061 }
2062 else
2063 {
2064 // Scan for terminating ']]>'.
2065 SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2066 CHECK_ERROR(status_bad_cdata, s);
2067
2068 *s++ = 0; // Zero-terminate this segment.
2069 }
2070 }
2071 else // Flagged for discard, but we still have to scan for the terminator.
2072 {
2073 // Scan for terminating ']]>'.
2074 SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2075 CHECK_ERROR(status_bad_cdata, s);
2076
2077 ++s;
2078 }
2079
2080 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
2081 }
2082 else THROW_ERROR(status_bad_cdata, s);
2083 }
2084 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
2085 {
2086 s -= 2;
2087
2088 if (cursor->parent) THROW_ERROR(status_bad_doctype, s);
2089
2090 char_t* mark = s + 9;
2091
2092 s = parse_doctype_group(s, endch, true);
2093
2094 if (OPTSET(parse_doctype))
2095 {
2096 while (IS_CHARTYPE(*mark, ct_space)) ++mark;
2097
2098 PUSHNODE(node_doctype);
2099
2100 cursor->value = mark;
2101
2102 assert((s[0] == 0 && endch == '>') || s[-1] == '>');
2103 s[*s == 0 ? 0 : -1] = 0;
2104
2105 POPNODE();
2106 }
2107 }
2108 else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s);
2109 else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s);
2110 else THROW_ERROR(status_unrecognized_tag, s);
2111
2112 return s;
2113 }
2114
2115 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
2116 {
2117 // load into registers
2118 xml_node_struct* cursor = ref_cursor;
2119 char_t ch = 0;
2120
2121 // parse node contents, starting with question mark
2122 ++s;
2123
2124 // read PI target
2125 char_t* target = s;
2126
2127 if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s);
2128
2129 SCANWHILE(IS_CHARTYPE(*s, ct_symbol));
2130 CHECK_ERROR(status_bad_pi, s);
2131
2132 // determine node type; stricmp / strcasecmp is not portable
2133 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
2134
2135 if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi))
2136 {
2137 if (declaration)
2138 {
2139 // disallow non top-level declarations
2140 if (cursor->parent) THROW_ERROR(status_bad_pi, s);
2141
2142 PUSHNODE(node_declaration);
2143 }
2144 else
2145 {
2146 PUSHNODE(node_pi);
2147 }
2148
2149 cursor->name = target;
2150
2151 ENDSEG();
2152
2153 // parse value/attributes
2154 if (ch == '?')
2155 {
2156 // empty node
2157 if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
2158 s += (*s == '>');
2159
2160 POPNODE();
2161 }
2162 else if (IS_CHARTYPE(ch, ct_space))
2163 {
2164 SKIPWS();
2165
2166 // scan for tag end
2167 char_t* value = s;
2168
2169 SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2170 CHECK_ERROR(status_bad_pi, s);
2171
2172 if (declaration)
2173 {
2174 // replace ending ? with / so that 'element' terminates properly
2175 *s = '/';
2176
2177 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
2178 s = value;
2179 }
2180 else
2181 {
2182 // store value and step over >
2183 cursor->value = value;
2184 POPNODE();
2185
2186 ENDSEG();
2187
2188 s += (*s == '>');
2189 }
2190 }
2191 else THROW_ERROR(status_bad_pi, s);
2192 }
2193 else
2194 {
2195 // scan for tag end
2196 SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2197 CHECK_ERROR(status_bad_pi, s);
2198
2199 s += (s[1] == '>' ? 2 : 1);
2200 }
2201
2202 // store from registers
2203 ref_cursor = cursor;
2204
2205 return s;
2206 }
2207
2208 void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch)
2209 {
2210 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
2211 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
2212
2213 char_t ch = 0;
2214 xml_node_struct* cursor = xmldoc;
2215 char_t* mark = s;
2216
2217 while (*s != 0)
2218 {
2219 if (*s == '<')
2220 {
2221 ++s;
2222
2223 LOC_TAG:
2224 if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
2225 {
2226 PUSHNODE(node_element); // Append a new node to the tree.
2227
2228 cursor->name = s;
2229
2230 SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2231 ENDSEG(); // Save char in 'ch', terminate & step over.
2232
2233 if (ch == '>')
2234 {
2235 // end of tag
2236 }
2237 else if (IS_CHARTYPE(ch, ct_space))
2238 {
2239 LOC_ATTRIBUTES:
2240 while (true)
2241 {
2242 SKIPWS(); // Eat any whitespace.
2243
2244 if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
2245 {
2246 xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
2247 if (!a) THROW_ERROR(status_out_of_memory, s);
2248
2249 a->name = s; // Save the offset.
2250
2251 SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2252 CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2253
2254 ENDSEG(); // Save char in 'ch', terminate & step over.
2255 CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2256
2257 if (IS_CHARTYPE(ch, ct_space))
2258 {
2259 SKIPWS(); // Eat any whitespace.
2260 CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2261
2262 ch = *s;
2263 ++s;
2264 }
2265
2266 if (ch == '=') // '<... #=...'
2267 {
2268 SKIPWS(); // Eat any whitespace.
2269
2270 if (*s == '"' || *s == '\'') // '<... #="...'
2271 {
2272 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
2273 ++s; // Step over the quote.
2274 a->value = s; // Save the offset.
2275
2276 s = strconv_attribute(s, ch);
2277
2278 if (!s) THROW_ERROR(status_bad_attribute, a->value);
2279
2280 // After this line the loop continues from the start;
2281 // Whitespaces, / and > are ok, symbols and EOF are wrong,
2282 // everything else will be detected
2283 if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s);
2284 }
2285 else THROW_ERROR(status_bad_attribute, s);
2286 }
2287 else THROW_ERROR(status_bad_attribute, s);
2288 }
2289 else if (*s == '/')
2290 {
2291 ++s;
2292
2293 if (*s == '>')
2294 {
2295 POPNODE();
2296 s++;
2297 break;
2298 }
2299 else if (*s == 0 && endch == '>')
2300 {
2301 POPNODE();
2302 break;
2303 }
2304 else THROW_ERROR(status_bad_start_element, s);
2305 }
2306 else if (*s == '>')
2307 {
2308 ++s;
2309
2310 break;
2311 }
2312 else if (*s == 0 && endch == '>')
2313 {
2314 break;
2315 }
2316 else THROW_ERROR(status_bad_start_element, s);
2317 }
2318
2319 // !!!
2320 }
2321 else if (ch == '/') // '<#.../'
2322 {
2323 if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s);
2324
2325 POPNODE(); // Pop.
2326
2327 s += (*s == '>');
2328 }
2329 else if (ch == 0)
2330 {
2331 // we stepped over null terminator, backtrack & handle closing tag
2332 --s;
2333
2334 if (endch != '>') THROW_ERROR(status_bad_start_element, s);
2335 }
2336 else THROW_ERROR(status_bad_start_element, s);
2337 }
2338 else if (*s == '/')
2339 {
2340 ++s;
2341
2342 char_t* name = cursor->name;
2343 if (!name) THROW_ERROR(status_end_element_mismatch, s);
2344
2345 while (IS_CHARTYPE(*s, ct_symbol))
2346 {
2347 if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s);
2348 }
2349
2350 if (*name)
2351 {
2352 if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s);
2353 else THROW_ERROR(status_end_element_mismatch, s);
2354 }
2355
2356 POPNODE(); // Pop.
2357
2358 SKIPWS();
2359
2360 if (*s == 0)
2361 {
2362 if (endch != '>') THROW_ERROR(status_bad_end_element, s);
2363 }
2364 else
2365 {
2366 if (*s != '>') THROW_ERROR(status_bad_end_element, s);
2367 ++s;
2368 }
2369 }
2370 else if (*s == '?') // '<?...'
2371 {
2372 s = parse_question(s, cursor, optmsk, endch);
2373
2374 assert(cursor);
2375 if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
2376 }
2377 else if (*s == '!') // '<!...'
2378 {
2379 s = parse_exclamation(s, cursor, optmsk, endch);
2380 }
2381 else if (*s == 0 && endch == '?') THROW_ERROR(status_bad_pi, s);
2382 else THROW_ERROR(status_unrecognized_tag, s);
2383 }
2384 else
2385 {
2386 mark = s; // Save this offset while searching for a terminator.
2387
2388 SKIPWS(); // Eat whitespace if no genuine PCDATA here.
2389
2390 if ((!OPTSET(parse_ws_pcdata) || mark == s) && (*s == '<' || !*s))
2391 {
2392 continue;
2393 }
2394
2395 s = mark;
2396
2397 if (cursor->parent)
2398 {
2399 PUSHNODE(node_pcdata); // Append a new node on the tree.
2400 cursor->value = s; // Save the offset.
2401
2402 s = strconv_pcdata(s);
2403
2404 POPNODE(); // Pop since this is a standalone.
2405
2406 if (!*s) break;
2407 }
2408 else
2409 {
2410 SCANFOR(*s == '<'); // '...<'
2411 if (!*s) break;
2412
2413 ++s;
2414 }
2415
2416 // We're after '<'
2417 goto LOC_TAG;
2418 }
2419 }
2420
2421 // check that last tag is closed
2422 if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s);
2423 }
2424
2425 static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk)
2426 {
2427 xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root);
2428
2429 // store buffer for offset_debug
2430 xmldoc->buffer = buffer;
2431
2432 // early-out for empty documents
2433 if (length == 0) return make_parse_result(status_ok);
2434
2435 // create parser on stack
2436 xml_parser parser(*xmldoc);
2437
2438 // save last character and make buffer zero-terminated (speeds up parsing)
2439 char_t endch = buffer[length - 1];
2440 buffer[length - 1] = 0;
2441
2442 // perform actual parsing
2443 int error = setjmp(parser.error_handler);
2444
2445 if (error == 0)
2446 {
2447 parser.parse(buffer, xmldoc, optmsk, endch);
2448 }
2449
2450 xml_parse_result result = make_parse_result(static_cast<xml_parse_status>(error), parser.error_offset ? parser.error_offset - buffer : 0);
2451 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
2452
2453 // update allocator state
2454 *static_cast<xml_allocator*>(xmldoc) = parser.alloc;
2455
2456 // since we removed last character, we have to handle the only possible false positive
2457 if (result && endch == '<')
2458 {
2459 // there's no possible well-formed document with < at the end
2460 return make_parse_result(status_unrecognized_tag, length);
2461 }
2462
2463 return result;
2464 }
2465 };
2466
2467 // Output facilities
2468 xml_encoding get_write_native_encoding()
2469 {
2470 #ifdef PUGIXML_WCHAR_MODE
2471 return get_wchar_encoding();
2472 #else
2473 return encoding_utf8;
2474 #endif
2475 }
2476
2477 xml_encoding get_write_encoding(xml_encoding encoding)
2478 {
2479 // replace wchar encoding with utf implementation
2480 if (encoding == encoding_wchar) return get_wchar_encoding();
2481
2482 // replace utf16 encoding with utf16 with specific endianness
2483 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2484
2485 // replace utf32 encoding with utf32 with specific endianness
2486 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2487
2488 // only do autodetection if no explicit encoding is requested
2489 if (encoding != encoding_auto) return encoding;
2490
2491 // assume utf8 encoding
2492 return encoding_utf8;
2493 }
2494
2495#ifdef PUGIXML_WCHAR_MODE
2496 size_t get_valid_length(const char_t* data, size_t length)
2497 {
2498 assert(length > 0);
2499
2500 // discard last character if it's the lead of a surrogate pair
2501 return (sizeof(wchar_t) == 2 && (unsigned)(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
2502 }
2503
2504 size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding)
2505 {
2506 // only endian-swapping is required
2507 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
2508 {
2509 convert_wchar_endian_swap(reinterpret_cast<char_t*>(result), data, length);
2510
2511 return length * sizeof(char_t);
2512 }
2513
2514 // convert to utf8
2515 if (encoding == encoding_utf8)
2516 {
2517 uint8_t* dest = reinterpret_cast<uint8_t*>(result);
2518
2519 uint8_t* end = sizeof(wchar_t) == 2 ?
2520 utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(data), length, dest) :
2521 utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(data), length, dest);
2522
2523 return static_cast<size_t>(end - dest);
2524 }
2525
2526 // convert to utf16
2527 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2528 {
2529 uint16_t* dest = reinterpret_cast<uint16_t*>(result);
2530
2531 // convert to native utf16
2532 uint16_t* end = utf_decoder<utf16_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(data), length, dest);
2533
2534 // swap if necessary
2535 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2536
2537 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2538
2539 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2540 }
2541
2542 // convert to utf32
2543 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2544 {
2545 uint32_t* dest = reinterpret_cast<uint32_t*>(result);
2546
2547 // convert to native utf32
2548 uint32_t* end = utf_decoder<utf32_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(data), length, dest);
2549
2550 // swap if necessary
2551 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2552
2553 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2554
2555 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2556 }
2557
2558 assert(!"Invalid encoding");
2559 return 0;
2560 }
2561#else
2562 size_t get_valid_length(const char_t* data, size_t length)
2563 {
2564 assert(length > 4);
2565
2566 for (size_t i = 1; i <= 4; ++i)
2567 {
2568 uint8_t ch = static_cast<uint8_t>(data[length - i]);
2569
2570 // either a standalone character or a leading one
2571 if ((ch & 0xc0) != 0x80) return length - i;
2572 }
2573
2574 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
2575 return length;
2576 }
2577
2578 size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding)
2579 {
2580 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2581 {
2582 uint16_t* dest = reinterpret_cast<uint16_t*>(result);
2583
2584 // convert to native utf16
2585 uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2586
2587 // swap if necessary
2588 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2589
2590 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2591
2592 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2593 }
2594
2595 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2596 {
2597 uint32_t* dest = reinterpret_cast<uint32_t*>(result);
2598
2599 // convert to native utf32
2600 uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2601
2602 // swap if necessary
2603 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2604
2605 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2606
2607 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2608 }
2609
2610 assert(!"Invalid encoding");
2611 return 0;
2612 }
2613#endif
2614
2615 class xml_buffered_writer
2616 {
2617 xml_buffered_writer(const xml_buffered_writer&);
2618 xml_buffered_writer& operator=(const xml_buffered_writer&);
2619
2620 public:
2621 xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding))
2622 {
2623 }
2624
2625 ~xml_buffered_writer()
2626 {
2627 flush();
2628 }
2629
2630 void flush()
2631 {
2632 flush(buffer, bufsize);
2633 bufsize = 0;
2634 }
2635
2636 void flush(const char_t* data, size_t size)
2637 {
2638 if (size == 0) return;
2639
2640 // fast path, just write data
2641 if (encoding == get_write_native_encoding())
2642 writer.write(data, size * sizeof(char_t));
2643 else
2644 {
2645 // convert chunk
2646 size_t result = convert_buffer(scratch, data, size, encoding);
2647 assert(result <= sizeof(scratch));
2648
2649 // write data
2650 writer.write(scratch, result);
2651 }
2652 }
2653
2654 void write(const char_t* data, size_t length)
2655 {
2656 if (bufsize + length > bufcapacity)
2657 {
2658 // flush the remaining buffer contents
2659 flush();
2660
2661 // handle large chunks
2662 if (length > bufcapacity)
2663 {
2664 if (encoding == get_write_native_encoding())
2665 {
2666 // fast path, can just write data chunk
2667 writer.write(data, length * sizeof(char_t));
2668 return;
2669 }
2670
2671 // need to convert in suitable chunks
2672 while (length > bufcapacity)
2673 {
2674 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
2675 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
2676 size_t chunk_size = get_valid_length(data, bufcapacity);
2677
2678 // convert chunk and write
2679 flush(data, chunk_size);
2680
2681 // iterate
2682 data += chunk_size;
2683 length -= chunk_size;
2684 }
2685
2686 // small tail is copied below
2687 bufsize = 0;
2688 }
2689 }
2690
2691 memcpy(buffer + bufsize, data, length * sizeof(char_t));
2692 bufsize += length;
2693 }
2694
2695 void write(const char_t* data)
2696 {
2697 write(data, strlength(data));
2698 }
2699
2700 void write(char_t d0)
2701 {
2702 if (bufsize + 1 > bufcapacity) flush();
2703
2704 buffer[bufsize + 0] = d0;
2705 bufsize += 1;
2706 }
2707
2708 void write(char_t d0, char_t d1)
2709 {
2710 if (bufsize + 2 > bufcapacity) flush();
2711
2712 buffer[bufsize + 0] = d0;
2713 buffer[bufsize + 1] = d1;
2714 bufsize += 2;
2715 }
2716
2717 void write(char_t d0, char_t d1, char_t d2)
2718 {
2719 if (bufsize + 3 > bufcapacity) flush();
2720
2721 buffer[bufsize + 0] = d0;
2722 buffer[bufsize + 1] = d1;
2723 buffer[bufsize + 2] = d2;
2724 bufsize += 3;
2725 }
2726
2727 void write(char_t d0, char_t d1, char_t d2, char_t d3)
2728 {
2729 if (bufsize + 4 > bufcapacity) flush();
2730
2731 buffer[bufsize + 0] = d0;
2732 buffer[bufsize + 1] = d1;
2733 buffer[bufsize + 2] = d2;
2734 buffer[bufsize + 3] = d3;
2735 bufsize += 4;
2736 }
2737
2738 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
2739 {
2740 if (bufsize + 5 > bufcapacity) flush();
2741
2742 buffer[bufsize + 0] = d0;
2743 buffer[bufsize + 1] = d1;
2744 buffer[bufsize + 2] = d2;
2745 buffer[bufsize + 3] = d3;
2746 buffer[bufsize + 4] = d4;
2747 bufsize += 5;
2748 }
2749
2750 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
2751 {
2752 if (bufsize + 6 > bufcapacity) flush();
2753
2754 buffer[bufsize + 0] = d0;
2755 buffer[bufsize + 1] = d1;
2756 buffer[bufsize + 2] = d2;
2757 buffer[bufsize + 3] = d3;
2758 buffer[bufsize + 4] = d4;
2759 buffer[bufsize + 5] = d5;
2760 bufsize += 6;
2761 }
2762
2763 // utf8 maximum expansion: x4 (-> utf32)
2764 // utf16 maximum expansion: x2 (-> utf32)
2765 // utf32 maximum expansion: x1
2766 enum { bufcapacity = 2048 };
2767
2768 char_t buffer[bufcapacity];
2769 char scratch[4 * bufcapacity];
2770
2771 xml_writer& writer;
2772 size_t bufsize;
2773 xml_encoding encoding;
2774 };
2775
2776 void write_bom(xml_writer& writer, xml_encoding encoding)
2777 {
2778 switch (encoding)
2779 {
2780 case encoding_utf8:
2781 writer.write("\xef\xbb\xbf", 3);
2782 break;
2783
2784 case encoding_utf16_be:
2785 writer.write("\xfe\xff", 2);
2786 break;
2787
2788 case encoding_utf16_le:
2789 writer.write("\xff\xfe", 2);
2790 break;
2791
2792 case encoding_utf32_be:
2793 writer.write("\x00\x00\xfe\xff", 4);
2794 break;
2795
2796 case encoding_utf32_le:
2797 writer.write("\xff\xfe\x00\x00", 4);
2798 break;
2799
2800 default:
2801 assert(!"Invalid encoding");
2802 }
2803 }
2804
2805 void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
2806 {
2807 while (*s)
2808 {
2809 const char_t* prev = s;
2810
2811 // While *s is a usual symbol
2812 while (!IS_CHARTYPEX(*s, type)) ++s;
2813
2814 writer.write(prev, static_cast<size_t>(s - prev));
2815
2816 switch (*s)
2817 {
2818 case 0: break;
2819 case '&':
2820 writer.write('&', 'a', 'm', 'p', ';');
2821 ++s;
2822 break;
2823 case '<':
2824 writer.write('&', 'l', 't', ';');
2825 ++s;
2826 break;
2827 case '>':
2828 writer.write('&', 'g', 't', ';');
2829 ++s;
2830 break;
2831 case '"':
2832 writer.write('&', 'q', 'u', 'o', 't', ';');
2833 ++s;
2834 break;
2835 default: // s is not a usual symbol
2836 {
2837 unsigned int ch = static_cast<unsigned int>(*s++);
2838 assert(ch < 32);
2839
2840 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
2841 }
2842 }
2843 }
2844 }
2845
2846 void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
2847 {
2848 do
2849 {
2850 writer.write('<', '!', '[', 'C', 'D');
2851 writer.write('A', 'T', 'A', '[');
2852
2853 const char_t* prev = s;
2854
2855 // look for ]]> sequence - we can't output it as is since it terminates CDATA
2856 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
2857
2858 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
2859 if (*s) s += 2;
2860
2861 writer.write(prev, static_cast<size_t>(s - prev));
2862
2863 writer.write(']', ']', '>');
2864 }
2865 while (*s);
2866 }
2867
2868 void node_output_attributes(xml_buffered_writer& writer, const xml_node& node)
2869 {
2870 const char_t* default_name = PUGIXML_TEXT(":anonymous");
2871
2872 for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
2873 {
2874 writer.write(' ');
2875 writer.write(a.name()[0] ? a.name() : default_name);
2876 writer.write('=', '"');
2877
2878 text_output_escaped(writer, a.value(), ctx_special_attr);
2879
2880 writer.write('"');
2881 }
2882 }
2883
2884 void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
2885 {
2886 const char_t* default_name = PUGIXML_TEXT(":anonymous");
2887
2888 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
2889 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
2890
2891 switch (node.type())
2892 {
2893 case node_document:
2894 {
2895 for (xml_node n = node.first_child(); n; n = n.next_sibling())
2896 node_output(writer, n, indent, flags, depth);
2897 break;
2898 }
2899
2900 case node_element:
2901 {
2902 const char_t* name = node.name()[0] ? node.name() : default_name;
2903
2904 writer.write('<');
2905 writer.write(name);
2906
2907 node_output_attributes(writer, node);
2908
2909 if (flags & format_raw)
2910 {
2911 if (!node.first_child())
2912 writer.write(' ', '/', '>');
2913 else
2914 {
2915 writer.write('>');
2916
2917 for (xml_node n = node.first_child(); n; n = n.next_sibling())
2918 node_output(writer, n, indent, flags, depth + 1);
2919
2920 writer.write('<', '/');
2921 writer.write(name);
2922 writer.write('>');
2923 }
2924 }
2925 else if (!node.first_child())
2926 writer.write(' ', '/', '>', '\n');
2927 else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
2928 {
2929 writer.write('>');
2930
2931 if (node.first_child().type() == node_pcdata)
2932 text_output_escaped(writer, node.first_child().value(), ctx_special_pcdata);
2933 else
2934 text_output_cdata(writer, node.first_child().value());
2935
2936 writer.write('<', '/');
2937 writer.write(name);
2938 writer.write('>', '\n');
2939 }
2940 else
2941 {
2942 writer.write('>', '\n');
2943
2944 for (xml_node n = node.first_child(); n; n = n.next_sibling())
2945 node_output(writer, n, indent, flags, depth + 1);
2946
2947 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
2948 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
2949
2950 writer.write('<', '/');
2951 writer.write(name);
2952 writer.write('>', '\n');
2953 }
2954
2955 break;
2956 }
2957
2958 case node_pcdata:
2959 text_output_escaped(writer, node.value(), ctx_special_pcdata);
2960 if ((flags & format_raw) == 0) writer.write('\n');
2961 break;
2962
2963 case node_cdata:
2964 text_output_cdata(writer, node.value());
2965 if ((flags & format_raw) == 0) writer.write('\n');
2966 break;
2967
2968 case node_comment:
2969 writer.write('<', '!', '-', '-');
2970 writer.write(node.value());
2971 writer.write('-', '-', '>');
2972 if ((flags & format_raw) == 0) writer.write('\n');
2973 break;
2974
2975 case node_pi:
2976 case node_declaration:
2977 writer.write('<', '?');
2978 writer.write(node.name()[0] ? node.name() : default_name);
2979
2980 if (node.type() == node_declaration)
2981 {
2982 node_output_attributes(writer, node);
2983 }
2984 else if (node.value()[0])
2985 {
2986 writer.write(' ');
2987 writer.write(node.value());
2988 }
2989
2990 writer.write('?', '>');
2991 if ((flags & format_raw) == 0) writer.write('\n');
2992 break;
2993
2994 case node_doctype:
2995 writer.write('<', '!', 'D', 'O', 'C');
2996 writer.write('T', 'Y', 'P', 'E');
2997
2998 if (node.value()[0])
2999 {
3000 writer.write(' ');
3001 writer.write(node.value());
3002 }
3003
3004 writer.write('>');
3005 if ((flags & format_raw) == 0) writer.write('\n');
3006 break;
3007
3008 default:
3009 assert(!"Invalid node type");
3010 }
3011 }
3012
3013 inline bool has_declaration(const xml_node& node)
3014 {
3015 for (xml_node child = node.first_child(); child; child = child.next_sibling())
3016 {
3017 xml_node_type type = child.type();
3018
3019 if (type == node_declaration) return true;
3020 if (type == node_element) return false;
3021 }
3022
3023 return false;
3024 }
3025
3026 inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
3027 {
3028 if (parent != node_document && parent != node_element) return false;
3029 if (child == node_document || child == node_null) return false;
3030 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
3031
3032 return true;
3033 }
3034
3035 void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
3036 {
3037 assert(dest.type() == source.type());
3038
3039 switch (source.type())
3040 {
3041 case node_element:
3042 {
3043 dest.set_name(source.name());
3044
3045 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3046 dest.append_attribute(a.name()).set_value(a.value());
3047
3048 for (xml_node c = source.first_child(); c; c = c.next_sibling())
3049 {
3050 if (c == skip) continue;
3051
3052 xml_node cc = dest.append_child(c.type());
3053 assert(cc);
3054
3055 recursive_copy_skip(cc, c, skip);
3056 }
3057
3058 break;
3059 }
3060
3061 case node_pcdata:
3062 case node_cdata:
3063 case node_comment:
3064 case node_doctype:
3065 dest.set_value(source.value());
3066 break;
3067
3068 case node_pi:
3069 dest.set_name(source.name());
3070 dest.set_value(source.value());
3071 break;
3072
3073 case node_declaration:
3074 {
3075 dest.set_name(source.name());
3076
3077 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3078 dest.append_attribute(a.name()).set_value(a.value());
3079
3080 break;
3081 }
3082
3083 default:
3084 assert(!"Invalid node type");
3085 }
3086 }
3087
3088 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
3089 xml_parse_status get_file_size(FILE* file, size_t& out_result)
3090 {
3091 #if defined(_MSC_VER) && _MSC_VER >= 1400
3092 // there are 64-bit versions of fseek/ftell, let's use them
3093 typedef __int64 length_type;
3094
3095 _fseeki64(file, 0, SEEK_END);
3096 length_type length = _ftelli64(file);
3097 _fseeki64(file, 0, SEEK_SET);
3098 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
3099 // there are 64-bit versions of fseek/ftell, let's use them
3100 typedef off64_t length_type;
3101
3102 fseeko64(file, 0, SEEK_END);
3103 length_type length = ftello64(file);
3104 fseeko64(file, 0, SEEK_SET);
3105 #else
3106 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
3107 typedef long length_type;
3108
3109 fseek(file, 0, SEEK_END);
3110 length_type length = ftell(file);
3111 fseek(file, 0, SEEK_SET);
3112 #endif
3113
3114 // check for I/O errors
3115 if (length < 0) return status_io_error;
3116
3117 // check for overflow
3118 size_t result = static_cast<size_t>(length);
3119
3120 if (static_cast<length_type>(result) != length) return status_out_of_memory;
3121
3122 // finalize
3123 out_result = result;
3124
3125 return status_ok;
3126 }
3127
3128 xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
3129 {
3130 if (!file) return make_parse_result(status_file_not_found);
3131
3132 // get file size (can result in I/O errors)
3133 size_t size = 0;
3134 xml_parse_status size_status = get_file_size(file, size);
3135
3136 if (size_status != status_ok)
3137 {
3138 fclose(file);
3139 return make_parse_result(size_status);
3140 }
3141
3142 // allocate buffer for the whole file
3143 char* contents = static_cast<char*>(global_allocate(size > 0 ? size : 1));
3144
3145 if (!contents)
3146 {
3147 fclose(file);
3148 return make_parse_result(status_out_of_memory);
3149 }
3150
3151 // read file in memory
3152 size_t read_size = fread(contents, 1, size, file);
3153 fclose(file);
3154
3155 if (read_size != size)
3156 {
3157 global_deallocate(contents);
3158 return make_parse_result(status_io_error);
3159 }
3160
3161 return doc.load_buffer_inplace_own(contents, size, options, encoding);
3162 }
3163
3164#ifndef PUGIXML_NO_STL
3165 template <typename T> xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
3166 {
3167 // get length of remaining data in stream
3168 typename std::basic_istream<T>::pos_type pos = stream.tellg();
3169 stream.seekg(0, std::ios::end);
3170 std::streamoff length = stream.tellg() - pos;
3171 stream.seekg(pos);
3172
3173 if (stream.fail() || pos < 0) return make_parse_result(status_io_error);
3174
3175 // guard against huge files
3176 size_t read_length = static_cast<size_t>(length);
3177
3178 if (static_cast<std::streamsize>(read_length) != length || length < 0) return make_parse_result(status_out_of_memory);
3179
3180 // read stream data into memory (guard against stream exceptions with buffer holder)
3181 buffer_holder buffer(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T)), global_deallocate);
3182 if (!buffer.data) return make_parse_result(status_out_of_memory);
3183
3184 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
3185
3186 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
3187 if (stream.bad()) return make_parse_result(status_io_error);
3188
3189 // load data from buffer
3190 size_t actual_length = static_cast<size_t>(stream.gcount());
3191 assert(actual_length <= read_length);
3192
3193 return doc.load_buffer_inplace_own(buffer.release(), actual_length * sizeof(T), options, encoding);
3194 }
3195#endif
3196
3197#if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__MINGW32__)
3198 FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
3199 {
3200 return _wfopen(path, mode);
3201 }
3202#else
3203 char* convert_path_heap(const wchar_t* str)
3204 {
3205 assert(str);
3206
3207 // first pass: get length in utf8 characters
3208 size_t length = wcslen(str);
3209 size_t size = as_utf8_begin(str, length);
3210
3211 // allocate resulting string
3212 char* result = static_cast<char*>(global_allocate(size + 1));
3213 if (!result) return 0;
3214
3215 // second pass: convert to utf8
3216 as_utf8_end(result, size, str, length);
3217
3218 return result;
3219 }
3220
3221 FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
3222 {
3223 // there is no standard function to open wide paths, so our best bet is to try utf8 path
3224 char* path_utf8 = convert_path_heap(path);
3225 if (!path_utf8) return 0;
3226
3227 // convert mode to ASCII (we mirror _wfopen interface)
3228 char mode_ascii[4] = {0};
3229 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
3230
3231 // try to open the utf8 path
3232 FILE* result = fopen(path_utf8, mode_ascii);
3233
3234 // free dummy buffer
3235 global_deallocate(path_utf8);
3236
3237 return result;
3238 }
3239#endif
3240}
3241
3242namespace pugi
3243{
3244 xml_writer_file::xml_writer_file(void* file): file(file)
3245 {
3246 }
3247
3248 void xml_writer_file::write(const void* data, size_t size)
3249 {
3250 fwrite(data, size, 1, static_cast<FILE*>(file));
3251 }
3252
3253#ifndef PUGIXML_NO_STL
3254 xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
3255 {
3256 }
3257
3258 xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
3259 {
3260 }
3261
3262 void xml_writer_stream::write(const void* data, size_t size)
3263 {
3264 if (narrow_stream)
3265 {
3266 assert(!wide_stream);
3267 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
3268 }
3269 else
3270 {
3271 assert(wide_stream);
3272 assert(size % sizeof(wchar_t) == 0);
3273
3274 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
3275 }
3276 }
3277#endif
3278
3279 xml_tree_walker::xml_tree_walker(): _depth(0)
3280 {
3281 }
3282
3283 xml_tree_walker::~xml_tree_walker()
3284 {
3285 }
3286
3287 int xml_tree_walker::depth() const
3288 {
3289 return _depth;
3290 }
3291
3292 bool xml_tree_walker::begin(xml_node&)
3293 {
3294 return true;
3295 }
3296
3297 bool xml_tree_walker::end(xml_node&)
3298 {
3299 return true;
3300 }
3301
3302 xml_attribute::xml_attribute(): _attr(0)
3303 {
3304 }
3305
3306 xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
3307 {
3308 }
3309
3310 xml_attribute::operator xml_attribute::unspecified_bool_type() const
3311 {
3312 return _attr ? &xml_attribute::_attr : 0;
3313 }
3314
3315 bool xml_attribute::operator!() const
3316 {
3317 return !_attr;
3318 }
3319
3320 bool xml_attribute::operator==(const xml_attribute& r) const
3321 {
3322 return (_attr == r._attr);
3323 }
3324
3325 bool xml_attribute::operator!=(const xml_attribute& r) const
3326 {
3327 return (_attr != r._attr);
3328 }
3329
3330 bool xml_attribute::operator<(const xml_attribute& r) const
3331 {
3332 return (_attr < r._attr);
3333 }
3334
3335 bool xml_attribute::operator>(const xml_attribute& r) const
3336 {
3337 return (_attr > r._attr);
3338 }
3339
3340 bool xml_attribute::operator<=(const xml_attribute& r) const
3341 {
3342 return (_attr <= r._attr);
3343 }
3344
3345 bool xml_attribute::operator>=(const xml_attribute& r) const
3346 {
3347 return (_attr >= r._attr);
3348 }
3349
3350 xml_attribute xml_attribute::next_attribute() const
3351 {
3352 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
3353 }
3354
3355 xml_attribute xml_attribute::previous_attribute() const
3356 {
3357 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
3358 }
3359
3360 int xml_attribute::as_int() const
3361 {
3362 if (!_attr || !_attr->value) return 0;
3363
3364 #ifdef PUGIXML_WCHAR_MODE
3365 return (int)wcstol(_attr->value, 0, 10);
3366 #else
3367 return (int)strtol(_attr->value, 0, 10);
3368 #endif
3369 }
3370
3371 unsigned int xml_attribute::as_uint() const
3372 {
3373 if (!_attr || !_attr->value) return 0;
3374
3375 #ifdef PUGIXML_WCHAR_MODE
3376 return (unsigned int)wcstoul(_attr->value, 0, 10);
3377 #else
3378 return (unsigned int)strtoul(_attr->value, 0, 10);
3379 #endif
3380 }
3381
3382 double xml_attribute::as_double() const
3383 {
3384 if (!_attr || !_attr->value) return 0;
3385
3386 #ifdef PUGIXML_WCHAR_MODE
3387 return wcstod(_attr->value, 0);
3388 #else
3389 return strtod(_attr->value, 0);
3390 #endif
3391 }
3392
3393 float xml_attribute::as_float() const
3394 {
3395 if (!_attr || !_attr->value) return 0;
3396
3397 #ifdef PUGIXML_WCHAR_MODE
3398 return (float)wcstod(_attr->value, 0);
3399 #else
3400 return (float)strtod(_attr->value, 0);
3401 #endif
3402 }
3403
3404 bool xml_attribute::as_bool() const
3405 {
3406 if (!_attr || !_attr->value) return false;
3407
3408 // only look at first char
3409 char_t first = *_attr->value;
3410
3411 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
3412 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
3413 }
3414
3415 bool xml_attribute::empty() const
3416 {
3417 return !_attr;
3418 }
3419
3420 const char_t* xml_attribute::name() const
3421 {
3422 return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
3423 }
3424
3425 const char_t* xml_attribute::value() const
3426 {
3427 return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
3428 }
3429
3430 size_t xml_attribute::hash_value() const
3431 {
3432 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
3433 }
3434
3435 xml_attribute_struct* xml_attribute::internal_object() const
3436 {
3437 return _attr;
3438 }
3439
3440 xml_attribute& xml_attribute::operator=(const char_t* rhs)
3441 {
3442 set_value(rhs);
3443 return *this;
3444 }
3445
3446 xml_attribute& xml_attribute::operator=(int rhs)
3447 {
3448 set_value(rhs);
3449 return *this;
3450 }
3451
3452 xml_attribute& xml_attribute::operator=(unsigned int rhs)
3453 {
3454 set_value(rhs);
3455 return *this;
3456 }
3457
3458 xml_attribute& xml_attribute::operator=(double rhs)
3459 {
3460 set_value(rhs);
3461 return *this;
3462 }
3463
3464 xml_attribute& xml_attribute::operator=(bool rhs)
3465 {
3466 set_value(rhs);
3467 return *this;
3468 }
3469
3470 bool xml_attribute::set_name(const char_t* rhs)
3471 {
3472 if (!_attr) return false;
3473
3474 return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs);
3475 }
3476
3477 bool xml_attribute::set_value(const char_t* rhs)
3478 {
3479 if (!_attr) return false;
3480
3481 return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs);
3482 }
3483
3484 bool xml_attribute::set_value(int rhs)
3485 {
3486 char buf[128];
3487 sprintf(buf, "%d", rhs);
3488
3489 #ifdef PUGIXML_WCHAR_MODE
3490 char_t wbuf[128];
3491 widen_ascii(wbuf, buf);
3492
3493 return set_value(wbuf);
3494 #else
3495 return set_value(buf);
3496 #endif
3497 }
3498
3499 bool xml_attribute::set_value(unsigned int rhs)
3500 {
3501 char buf[128];
3502 sprintf(buf, "%u", rhs);
3503
3504 #ifdef PUGIXML_WCHAR_MODE
3505 char_t wbuf[128];
3506 widen_ascii(wbuf, buf);
3507
3508 return set_value(wbuf);
3509 #else
3510 return set_value(buf);
3511 #endif
3512 }
3513
3514 bool xml_attribute::set_value(double rhs)
3515 {
3516 char buf[128];
3517 sprintf(buf, "%g", rhs);
3518
3519 #ifdef PUGIXML_WCHAR_MODE
3520 char_t wbuf[128];
3521 widen_ascii(wbuf, buf);
3522
3523 return set_value(wbuf);
3524 #else
3525 return set_value(buf);
3526 #endif
3527 }
3528
3529 bool xml_attribute::set_value(bool rhs)
3530 {
3531 return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
3532 }
3533
3534#ifdef __BORLANDC__
3535 bool operator&&(const xml_attribute& lhs, bool rhs)
3536 {
3537 return (bool)lhs && rhs;
3538 }
3539
3540 bool operator||(const xml_attribute& lhs, bool rhs)
3541 {
3542 return (bool)lhs || rhs;
3543 }
3544#endif
3545
3546 xml_node::xml_node(): _root(0)
3547 {
3548 }
3549
3550 xml_node::xml_node(xml_node_struct* p): _root(p)
3551 {
3552 }
3553
3554 xml_node::operator xml_node::unspecified_bool_type() const
3555 {
3556 return _root ? &xml_node::_root : 0;
3557 }
3558
3559 bool xml_node::operator!() const
3560 {
3561 return !_root;
3562 }
3563
3564 xml_node::iterator xml_node::begin() const
3565 {
3566 return iterator(_root ? _root->first_child : 0, _root);
3567 }
3568
3569 xml_node::iterator xml_node::end() const
3570 {
3571 return iterator(0, _root);
3572 }
3573
3574 xml_node::attribute_iterator xml_node::attributes_begin() const
3575 {
3576 return attribute_iterator(_root ? _root->first_attribute : 0, _root);
3577 }
3578
3579 xml_node::attribute_iterator xml_node::attributes_end() const
3580 {
3581 return attribute_iterator(0, _root);
3582 }
3583
3584 bool xml_node::operator==(const xml_node& r) const
3585 {
3586 return (_root == r._root);
3587 }
3588
3589 bool xml_node::operator!=(const xml_node& r) const
3590 {
3591 return (_root != r._root);
3592 }
3593
3594 bool xml_node::operator<(const xml_node& r) const
3595 {
3596 return (_root < r._root);
3597 }
3598
3599 bool xml_node::operator>(const xml_node& r) const
3600 {
3601 return (_root > r._root);
3602 }
3603
3604 bool xml_node::operator<=(const xml_node& r) const
3605 {
3606 return (_root <= r._root);
3607 }
3608
3609 bool xml_node::operator>=(const xml_node& r) const
3610 {
3611 return (_root >= r._root);
3612 }
3613
3614 bool xml_node::empty() const
3615 {
3616 return !_root;
3617 }
3618
3619 const char_t* xml_node::name() const
3620 {
3621 return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
3622 }
3623
3624 xml_node_type xml_node::type() const
3625 {
3626 return _root ? static_cast<xml_node_type>((_root->header & xml_memory_page_type_mask) + 1) : node_null;
3627 }
3628
3629 const char_t* xml_node::value() const
3630 {
3631 return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
3632 }
3633
3634 xml_node xml_node::child(const char_t* name) const
3635 {
3636 if (!_root) return xml_node();
3637
3638 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
3639 if (i->name && strequal(name, i->name)) return xml_node(i);
3640
3641 return xml_node();
3642 }
3643
3644 xml_attribute xml_node::attribute(const char_t* name) const
3645 {
3646 if (!_root) return xml_attribute();
3647
3648 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
3649 if (i->name && strequal(name, i->name))
3650 return xml_attribute(i);
3651
3652 return xml_attribute();
3653 }
3654
3655 xml_node xml_node::next_sibling(const char_t* name) const
3656 {
3657 if (!_root) return xml_node();
3658
3659 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
3660 if (i->name && strequal(name, i->name)) return xml_node(i);
3661
3662 return xml_node();
3663 }
3664
3665 xml_node xml_node::next_sibling() const
3666 {
3667 if (!_root) return xml_node();
3668
3669 if (_root->next_sibling) return xml_node(_root->next_sibling);
3670 else return xml_node();
3671 }
3672
3673 xml_node xml_node::previous_sibling(const char_t* name) const
3674 {
3675 if (!_root) return xml_node();
3676
3677 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
3678 if (i->name && strequal(name, i->name)) return xml_node(i);
3679
3680 return xml_node();
3681 }
3682
3683 xml_node xml_node::previous_sibling() const
3684 {
3685 if (!_root) return xml_node();
3686
3687 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
3688 else return xml_node();
3689 }
3690
3691 xml_node xml_node::parent() const
3692 {
3693 return _root ? xml_node(_root->parent) : xml_node();
3694 }
3695
3696 xml_node xml_node::root() const
3697 {
3698 if (!_root) return xml_node();
3699
3700 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(_root->header & xml_memory_page_pointer_mask);
3701
3702 return xml_node(static_cast<xml_document_struct*>(page->allocator));
3703 }
3704
3705 const char_t* xml_node::child_value() const
3706 {
3707 if (!_root) return PUGIXML_TEXT("");
3708
3709 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
3710 {
3711 xml_node_type type = static_cast<xml_node_type>((i->header & xml_memory_page_type_mask) + 1);
3712
3713 if (i->value && (type == node_pcdata || type == node_cdata))
3714 return i->value;
3715 }
3716
3717 return PUGIXML_TEXT("");
3718 }
3719
3720 const char_t* xml_node::child_value(const char_t* name) const
3721 {
3722 return child(name).child_value();
3723 }
3724
3725 xml_attribute xml_node::first_attribute() const
3726 {
3727 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
3728 }
3729
3730 xml_attribute xml_node::last_attribute() const
3731 {
3732 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
3733 }
3734
3735 xml_node xml_node::first_child() const
3736 {
3737 return _root ? xml_node(_root->first_child) : xml_node();
3738 }
3739
3740 xml_node xml_node::last_child() const
3741 {
3742 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
3743 }
3744
3745 bool xml_node::set_name(const char_t* rhs)
3746 {
3747 switch (type())
3748 {
3749 case node_pi:
3750 case node_declaration:
3751 case node_element:
3752 return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs);
3753
3754 default:
3755 return false;
3756 }
3757 }
3758
3759 bool xml_node::set_value(const char_t* rhs)
3760 {
3761 switch (type())
3762 {
3763 case node_pi:
3764 case node_cdata:
3765 case node_pcdata:
3766 case node_comment:
3767 case node_doctype:
3768 return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs);
3769
3770 default:
3771 return false;
3772 }
3773 }
3774
3775 xml_attribute xml_node::append_attribute(const char_t* name)
3776 {
3777 if (type() != node_element && type() != node_declaration) return xml_attribute();
3778
3779 xml_attribute a(append_attribute_ll(_root, get_allocator(_root)));
3780 a.set_name(name);
3781
3782 return a;
3783 }
3784
3785 xml_attribute xml_node::prepend_attribute(const char_t* name)
3786 {
3787 if (type() != node_element && type() != node_declaration) return xml_attribute();
3788
3789 xml_attribute a(allocate_attribute(get_allocator(_root)));
3790 if (!a) return xml_attribute();
3791
3792 a.set_name(name);
3793
3794 xml_attribute_struct* head = _root->first_attribute;
3795
3796 if (head)
3797 {
3798 a._attr->prev_attribute_c = head->prev_attribute_c;
3799 head->prev_attribute_c = a._attr;
3800 }
3801 else
3802 a._attr->prev_attribute_c = a._attr;
3803
3804 a._attr->next_attribute = head;
3805 _root->first_attribute = a._attr;
3806
3807 return a;
3808 }
3809
3810 xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr)
3811 {
3812 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
3813
3814 // check that attribute belongs to *this
3815 xml_attribute_struct* cur = attr._attr;
3816
3817 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
3818
3819 if (cur != _root->first_attribute) return xml_attribute();
3820
3821 xml_attribute a(allocate_attribute(get_allocator(_root)));
3822 if (!a) return xml_attribute();
3823
3824 a.set_name(name);
3825
3826 if (attr._attr->prev_attribute_c->next_attribute)
3827 attr._attr->prev_attribute_c->next_attribute = a._attr;
3828 else
3829 _root->first_attribute = a._attr;
3830
3831 a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
3832 a._attr->next_attribute = attr._attr;
3833 attr._attr->prev_attribute_c = a._attr;
3834
3835 return a;
3836 }
3837
3838 xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr)
3839 {
3840 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
3841
3842 // check that attribute belongs to *this
3843 xml_attribute_struct* cur = attr._attr;
3844
3845 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
3846
3847 if (cur != _root->first_attribute) return xml_attribute();
3848
3849 xml_attribute a(allocate_attribute(get_allocator(_root)));
3850 if (!a) return xml_attribute();
3851
3852 a.set_name(name);
3853
3854 if (attr._attr->next_attribute)
3855 attr._attr->next_attribute->prev_attribute_c = a._attr;
3856 else
3857 _root->first_attribute->prev_attribute_c = a._attr;
3858
3859 a._attr->next_attribute = attr._attr->next_attribute;
3860 a._attr->prev_attribute_c = attr._attr;
3861 attr._attr->next_attribute = a._attr;
3862
3863 return a;
3864 }
3865
3866 xml_attribute xml_node::append_copy(const xml_attribute& proto)
3867 {
3868 if (!proto) return xml_attribute();
3869
3870 xml_attribute result = append_attribute(proto.name());
3871 result.set_value(proto.value());
3872
3873 return result;
3874 }
3875
3876 xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
3877 {
3878 if (!proto) return xml_attribute();
3879
3880 xml_attribute result = prepend_attribute(proto.name());
3881 result.set_value(proto.value());
3882
3883 return result;
3884 }
3885
3886 xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
3887 {
3888 if (!proto) return xml_attribute();
3889
3890 xml_attribute result = insert_attribute_after(proto.name(), attr);
3891 result.set_value(proto.value());
3892
3893 return result;
3894 }
3895
3896 xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
3897 {
3898 if (!proto) return xml_attribute();
3899
3900 xml_attribute result = insert_attribute_before(proto.name(), attr);
3901 result.set_value(proto.value());
3902
3903 return result;
3904 }
3905
3906 xml_node xml_node::append_child(xml_node_type type)
3907 {
3908 if (!allow_insert_child(this->type(), type)) return xml_node();
3909
3910 xml_node n(append_node(_root, get_allocator(_root), type));
3911
3912 if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
3913
3914 return n;
3915 }
3916
3917 xml_node xml_node::prepend_child(xml_node_type type)
3918 {
3919 if (!allow_insert_child(this->type(), type)) return xml_node();
3920
3921 xml_node n(allocate_node(get_allocator(_root), type));
3922 if (!n) return xml_node();
3923
3924 n._root->parent = _root;
3925
3926 xml_node_struct* head = _root->first_child;
3927
3928 if (head)
3929 {
3930 n._root->prev_sibling_c = head->prev_sibling_c;
3931 head->prev_sibling_c = n._root;
3932 }
3933 else
3934 n._root->prev_sibling_c = n._root;
3935
3936 n._root->next_sibling = head;
3937 _root->first_child = n._root;
3938
3939 if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
3940
3941 return n;
3942 }
3943
3944 xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node)
3945 {
3946 if (!allow_insert_child(this->type(), type)) return xml_node();
3947 if (!node._root || node._root->parent != _root) return xml_node();
3948
3949 xml_node n(allocate_node(get_allocator(_root), type));
3950 if (!n) return xml_node();
3951
3952 n._root->parent = _root;
3953
3954 if (node._root->prev_sibling_c->next_sibling)
3955 node._root->prev_sibling_c->next_sibling = n._root;
3956 else
3957 _root->first_child = n._root;
3958
3959 n._root->prev_sibling_c = node._root->prev_sibling_c;
3960 n._root->next_sibling = node._root;
3961 node._root->prev_sibling_c = n._root;
3962
3963 if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
3964
3965 return n;
3966 }
3967
3968 xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node)
3969 {
3970 if (!allow_insert_child(this->type(), type)) return xml_node();
3971 if (!node._root || node._root->parent != _root) return xml_node();
3972
3973 xml_node n(allocate_node(get_allocator(_root), type));
3974 if (!n) return xml_node();
3975
3976 n._root->parent = _root;
3977
3978 if (node._root->next_sibling)
3979 node._root->next_sibling->prev_sibling_c = n._root;
3980 else
3981 _root->first_child->prev_sibling_c = n._root;
3982
3983 n._root->next_sibling = node._root->next_sibling;
3984 n._root->prev_sibling_c = node._root;
3985 node._root->next_sibling = n._root;
3986
3987 if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
3988
3989 return n;
3990 }
3991
3992 xml_node xml_node::append_child(const char_t* name)
3993 {
3994 xml_node result = append_child(node_element);
3995
3996 result.set_name(name);
3997
3998 return result;
3999 }
4000
4001 xml_node xml_node::prepend_child(const char_t* name)
4002 {
4003 xml_node result = prepend_child(node_element);
4004
4005 result.set_name(name);
4006
4007 return result;
4008 }
4009
4010 xml_node xml_node::insert_child_after(const char_t* name, const xml_node& node)
4011 {
4012 xml_node result = insert_child_after(node_element, node);
4013
4014 result.set_name(name);
4015
4016 return result;
4017 }
4018
4019 xml_node xml_node::insert_child_before(const char_t* name, const xml_node& node)
4020 {
4021 xml_node result = insert_child_before(node_element, node);
4022
4023 result.set_name(name);
4024
4025 return result;
4026 }
4027
4028 xml_node xml_node::append_copy(const xml_node& proto)
4029 {
4030 xml_node result = append_child(proto.type());
4031
4032 if (result) recursive_copy_skip(result, proto, result);
4033
4034 return result;
4035 }
4036
4037 xml_node xml_node::prepend_copy(const xml_node& proto)
4038 {
4039 xml_node result = prepend_child(proto.type());
4040
4041 if (result) recursive_copy_skip(result, proto, result);
4042
4043 return result;
4044 }
4045
4046 xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
4047 {
4048 xml_node result = insert_child_after(proto.type(), node);
4049
4050 if (result) recursive_copy_skip(result, proto, result);
4051
4052 return result;
4053 }
4054
4055 xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
4056 {
4057 xml_node result = insert_child_before(proto.type(), node);
4058
4059 if (result) recursive_copy_skip(result, proto, result);
4060
4061 return result;
4062 }
4063
4064 bool xml_node::remove_attribute(const char_t* name)
4065 {
4066 return remove_attribute(attribute(name));
4067 }
4068
4069 bool xml_node::remove_attribute(const xml_attribute& a)
4070 {
4071 if (!_root || !a._attr) return false;
4072
4073 // check that attribute belongs to *this
4074 xml_attribute_struct* attr = a._attr;
4075
4076 while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
4077
4078 if (attr != _root->first_attribute) return false;
4079
4080 if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
4081 else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
4082
4083 if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
4084 else _root->first_attribute = a._attr->next_attribute;
4085
4086 destroy_attribute(a._attr, get_allocator(_root));
4087
4088 return true;
4089 }
4090
4091 bool xml_node::remove_child(const char_t* name)
4092 {
4093 return remove_child(child(name));
4094 }
4095
4096 bool xml_node::remove_child(const xml_node& n)
4097 {
4098 if (!_root || !n._root || n._root->parent != _root) return false;
4099
4100 if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
4101 else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
4102
4103 if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
4104 else _root->first_child = n._root->next_sibling;
4105
4106 destroy_node(n._root, get_allocator(_root));
4107
4108 return true;
4109 }
4110
4111 xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const
4112 {
4113 if (!_root) return xml_node();
4114
4115 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4116 if (i->name && strequal(name, i->name))
4117 {
4118 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
4119 if (strequal(attr_name, a->name) && strequal(attr_value, a->value))
4120 return xml_node(i);
4121 }
4122
4123 return xml_node();
4124 }
4125
4126 xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
4127 {
4128 if (!_root) return xml_node();
4129
4130 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4131 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
4132 if (strequal(attr_name, a->name) && strequal(attr_value, a->value))
4133 return xml_node(i);
4134
4135 return xml_node();
4136 }
4137
4138#ifndef PUGIXML_NO_STL
4139 string_t xml_node::path(char_t delimiter) const
4140 {
4141 string_t path;
4142
4143 xml_node cursor = *this; // Make a copy.
4144
4145 path = cursor.name();
4146
4147 while (cursor.parent())
4148 {
4149 cursor = cursor.parent();
4150
4151 string_t temp = cursor.name();
4152 temp += delimiter;
4153 temp += path;
4154 path.swap(temp);
4155 }
4156
4157 return path;
4158 }
4159#endif
4160
4161 xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const
4162 {
4163 xml_node found = *this; // Current search context.
4164
4165 if (!_root || !path || !path[0]) return found;
4166
4167 if (path[0] == delimiter)
4168 {
4169 // Absolute path; e.g. '/foo/bar'
4170 found = found.root();
4171 ++path;
4172 }
4173
4174 const char_t* path_segment = path;
4175
4176 while (*path_segment == delimiter) ++path_segment;
4177
4178 const char_t* path_segment_end = path_segment;
4179
4180 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
4181
4182 if (path_segment == path_segment_end) return found;
4183
4184 const char_t* next_segment = path_segment_end;
4185
4186 while (*next_segment == delimiter) ++next_segment;
4187
4188 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
4189 return found.first_element_by_path(next_segment, delimiter);
4190 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
4191 return found.parent().first_element_by_path(next_segment, delimiter);
4192 else
4193 {
4194 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
4195 {
4196 if (j->name && strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
4197 {
4198 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
4199
4200 if (subsearch) return subsearch;
4201 }
4202 }
4203
4204 return xml_node();
4205 }
4206 }
4207
4208 bool xml_node::traverse(xml_tree_walker& walker)
4209 {
4210 walker._depth = -1;
4211
4212 xml_node arg_begin = *this;
4213 if (!walker.begin(arg_begin)) return false;
4214
4215 xml_node cur = first_child();
4216
4217 if (cur)
4218 {
4219 ++walker._depth;
4220
4221 do
4222 {
4223 xml_node arg_for_each = cur;
4224 if (!walker.for_each(arg_for_each))
4225 return false;
4226
4227 if (cur.first_child())
4228 {
4229 ++walker._depth;
4230 cur = cur.first_child();
4231 }
4232 else if (cur.next_sibling())
4233 cur = cur.next_sibling();
4234 else
4235 {
4236 // Borland C++ workaround
4237 while (!cur.next_sibling() && cur != *this && (bool)cur.parent())
4238 {
4239 --walker._depth;
4240 cur = cur.parent();
4241 }
4242
4243 if (cur != *this)
4244 cur = cur.next_sibling();
4245 }
4246 }
4247 while (cur && cur != *this);
4248 }
4249
4250 assert(walker._depth == -1);
4251
4252 xml_node arg_end = *this;
4253 return walker.end(arg_end);
4254 }
4255
4256 size_t xml_node::hash_value() const
4257 {
4258 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
4259 }
4260
4261 xml_node_struct* xml_node::internal_object() const
4262 {
4263 return _root;
4264 }
4265
4266 void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
4267 {
4268 if (!_root) return;
4269
4270 xml_buffered_writer buffered_writer(writer, encoding);
4271
4272 node_output(buffered_writer, *this, indent, flags, depth);
4273 }
4274
4275#ifndef PUGIXML_NO_STL
4276 void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
4277 {
4278 xml_writer_stream writer(stream);
4279
4280 print(writer, indent, flags, encoding, depth);
4281 }
4282
4283 void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
4284 {
4285 xml_writer_stream writer(stream);
4286
4287 print(writer, indent, flags, encoding_wchar, depth);
4288 }
4289#endif
4290
4291 ptrdiff_t xml_node::offset_debug() const
4292 {
4293 xml_node_struct* r = root()._root;
4294
4295 if (!r) return -1;
4296
4297 const char_t* buffer = static_cast<xml_document_struct*>(r)->buffer;
4298
4299 if (!buffer) return -1;
4300
4301 switch (type())
4302 {
4303 case node_document:
4304 return 0;
4305
4306 case node_element:
4307 case node_declaration:
4308 case node_pi:
4309 return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
4310
4311 case node_pcdata:
4312 case node_cdata:
4313 case node_comment:
4314 case node_doctype:
4315 return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
4316
4317 default:
4318 return -1;
4319 }
4320 }
4321
4322#ifdef __BORLANDC__
4323 bool operator&&(const xml_node& lhs, bool rhs)
4324 {
4325 return (bool)lhs && rhs;
4326 }
4327
4328 bool operator||(const xml_node& lhs, bool rhs)
4329 {
4330 return (bool)lhs || rhs;
4331 }
4332#endif
4333
4334 xml_node_iterator::xml_node_iterator()
4335 {
4336 }
4337
4338 xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
4339 {
4340 }
4341
4342 xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
4343 {
4344 }
4345
4346 bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
4347 {
4348 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
4349 }
4350
4351 bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
4352 {
4353 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
4354 }
4355
4356 xml_node& xml_node_iterator::operator*()
4357 {
4358 assert(_wrap._root);
4359 return _wrap;
4360 }
4361
4362 xml_node* xml_node_iterator::operator->()
4363 {
4364 assert(_wrap._root);
4365 return &_wrap;
4366 }
4367
4368 const xml_node_iterator& xml_node_iterator::operator++()
4369 {
4370 assert(_wrap._root);
4371 _wrap._root = _wrap._root->next_sibling;
4372 return *this;
4373 }
4374
4375 xml_node_iterator xml_node_iterator::operator++(int)
4376 {
4377 xml_node_iterator temp = *this;
4378 ++*this;
4379 return temp;
4380 }
4381
4382 const xml_node_iterator& xml_node_iterator::operator--()
4383 {
4384 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
4385 return *this;
4386 }
4387
4388 xml_node_iterator xml_node_iterator::operator--(int)
4389 {
4390 xml_node_iterator temp = *this;
4391 --*this;
4392 return temp;
4393 }
4394
4395 xml_attribute_iterator::xml_attribute_iterator()
4396 {
4397 }
4398
4399 xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
4400 {
4401 }
4402
4403 xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
4404 {
4405 }
4406
4407 bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
4408 {
4409 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
4410 }
4411
4412 bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
4413 {
4414 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
4415 }
4416
4417 xml_attribute& xml_attribute_iterator::operator*()
4418 {
4419 assert(_wrap._attr);
4420 return _wrap;
4421 }
4422
4423 xml_attribute* xml_attribute_iterator::operator->()
4424 {
4425 assert(_wrap._attr);
4426 return &_wrap;
4427 }
4428
4429 const xml_attribute_iterator& xml_attribute_iterator::operator++()
4430 {
4431 assert(_wrap._attr);
4432 _wrap._attr = _wrap._attr->next_attribute;
4433 return *this;
4434 }
4435
4436 xml_attribute_iterator xml_attribute_iterator::operator++(int)
4437 {
4438 xml_attribute_iterator temp = *this;
4439 ++*this;
4440 return temp;
4441 }
4442
4443 const xml_attribute_iterator& xml_attribute_iterator::operator--()
4444 {
4445 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
4446 return *this;
4447 }
4448
4449 xml_attribute_iterator xml_attribute_iterator::operator--(int)
4450 {
4451 xml_attribute_iterator temp = *this;
4452 --*this;
4453 return temp;
4454 }
4455
4456 xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
4457 {
4458 }
4459
4460 xml_parse_result::operator bool() const
4461 {
4462 return status == status_ok;
4463 }
4464
4465 const char* xml_parse_result::description() const
4466 {
4467 switch (status)
4468 {
4469 case status_ok: return "No error";
4470
4471 case status_file_not_found: return "File was not found";
4472 case status_io_error: return "Error reading from file/stream";
4473 case status_out_of_memory: return "Could not allocate memory";
4474 case status_internal_error: return "Internal error occurred";
4475
4476 case status_unrecognized_tag: return "Could not determine tag type";
4477
4478 case status_bad_pi: return "Error parsing document declaration/processing instruction";
4479 case status_bad_comment: return "Error parsing comment";
4480 case status_bad_cdata: return "Error parsing CDATA section";
4481 case status_bad_doctype: return "Error parsing document type declaration";
4482 case status_bad_pcdata: return "Error parsing PCDATA section";
4483 case status_bad_start_element: return "Error parsing start element tag";
4484 case status_bad_attribute: return "Error parsing element attribute";
4485 case status_bad_end_element: return "Error parsing end element tag";
4486 case status_end_element_mismatch: return "Start-end tags mismatch";
4487
4488 default: return "Unknown error";
4489 }
4490 }
4491
4492 xml_document::xml_document(): _buffer(0)
4493 {
4494 create();
4495 }
4496
4497 xml_document::~xml_document()
4498 {
4499 destroy();
4500 }
4501
4502 void xml_document::reset()
4503 {
4504 destroy();
4505 create();
4506 }
4507
4508 void xml_document::reset(const xml_document& proto)
4509 {
4510 reset();
4511
4512 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
4513 append_copy(cur);
4514 }
4515
4516 void xml_document::create()
4517 {
4518 // initialize sentinel page
4519 STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory));
4520
4521 // align upwards to page boundary
4522 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
4523
4524 // prepare page structure
4525 xml_memory_page* page = xml_memory_page::construct(page_memory);
4526
4527 page->busy_size = xml_memory_page_size;
4528
4529 // allocate new root
4530 _root = new (page->data) xml_document_struct(page);
4531 _root->prev_sibling_c = _root;
4532
4533 // setup sentinel page
4534 page->allocator = static_cast<xml_document_struct*>(_root);
4535 }
4536
4537 void xml_document::destroy()
4538 {
4539 // destroy static storage
4540 if (_buffer)
4541 {
4542 global_deallocate(_buffer);
4543 _buffer = 0;
4544 }
4545
4546 // destroy dynamic storage, leave sentinel page (it's in static memory)
4547 if (_root)
4548 {
4549 xml_memory_page* root_page = reinterpret_cast<xml_memory_page*>(_root->header & xml_memory_page_pointer_mask);
4550 assert(root_page && !root_page->prev && !root_page->memory);
4551
4552 // destroy all pages
4553 for (xml_memory_page* page = root_page->next; page; )
4554 {
4555 xml_memory_page* next = page->next;
4556
4557 xml_allocator::deallocate_page(page);
4558
4559 page = next;
4560 }
4561
4562 // cleanup root page
4563 root_page->allocator = 0;
4564 root_page->next = 0;
4565 root_page->busy_size = root_page->freed_size = 0;
4566
4567 _root = 0;
4568 }
4569 }
4570
4571#ifndef PUGIXML_NO_STL
4572 xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
4573 {
4574 reset();
4575
4576 return load_stream_impl(*this, stream, options, encoding);
4577 }
4578
4579 xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
4580 {
4581 reset();
4582
4583 return load_stream_impl(*this, stream, options, encoding_wchar);
4584 }
4585#endif
4586
4587 xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
4588 {
4589 // Force native encoding (skip autodetection)
4590 #ifdef PUGIXML_WCHAR_MODE
4591 xml_encoding encoding = encoding_wchar;
4592 #else
4593 xml_encoding encoding = encoding_utf8;
4594 #endif
4595
4596 return load_buffer(contents, strlength(contents) * sizeof(char_t), options, encoding);
4597 }
4598
4599 xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding)
4600 {
4601 reset();
4602
4603 FILE* file = fopen(path, "rb");
4604
4605 return load_file_impl(*this, file, options, encoding);
4606 }
4607
4608 xml_parse_result xml_document::load_file(const wchar_t* path, unsigned int options, xml_encoding encoding)
4609 {
4610 reset();
4611
4612 FILE* file = open_file_wide(path, L"rb");
4613
4614 return load_file_impl(*this, file, options, encoding);
4615 }
4616
4617 xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own)
4618 {
4619 reset();
4620
4621 // check input buffer
4622 assert(contents || size == 0);
4623
4624 // get actual encoding
4625 xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size);
4626
4627 // get private buffer
4628 char_t* buffer = 0;
4629 size_t length = 0;
4630
4631 if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory);
4632
4633 // delete original buffer if we performed a conversion
4634 if (own && buffer != contents && contents) global_deallocate(contents);
4635
4636 // parse
4637 xml_parse_result res = xml_parser::parse(buffer, length, _root, options);
4638
4639 // remember encoding
4640 res.encoding = buffer_encoding;
4641
4642 // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
4643 if (own || buffer != contents) _buffer = buffer;
4644
4645 return res;
4646 }
4647
4648 xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
4649 {
4650 return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false);
4651 }
4652
4653 xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
4654 {
4655 return load_buffer_impl(contents, size, options, encoding, true, false);
4656 }
4657
4658 xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
4659 {
4660 return load_buffer_impl(contents, size, options, encoding, true, true);
4661 }
4662
4663 void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
4664 {
4665 if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding));
4666
4667 xml_buffered_writer buffered_writer(writer, encoding);
4668
4669 if (!(flags & format_no_declaration) && !has_declaration(*this))
4670 {
4671 buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\"?>"));
4672 if (!(flags & format_raw)) buffered_writer.write('\n');
4673 }
4674
4675 node_output(buffered_writer, *this, indent, flags, 0);
4676 }
4677
4678#ifndef PUGIXML_NO_STL
4679 void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
4680 {
4681 xml_writer_stream writer(stream);
4682
4683 save(writer, indent, flags, encoding);
4684 }
4685
4686 void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
4687 {
4688 xml_writer_stream writer(stream);
4689
4690 save(writer, indent, flags, encoding_wchar);
4691 }
4692#endif
4693
4694 bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const
4695 {
4696 FILE* file = fopen(path, "wb");
4697 if (!file) return false;
4698
4699 xml_writer_file writer(file);
4700 save(writer, indent, flags, encoding);
4701
4702 fclose(file);
4703
4704 return true;
4705 }
4706
4707 bool xml_document::save_file(const wchar_t* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const
4708 {
4709 FILE* file = open_file_wide(path, L"wb");
4710 if (!file) return false;
4711
4712 xml_writer_file writer(file);
4713 save(writer, indent, flags, encoding);
4714
4715 fclose(file);
4716
4717 return true;
4718 }
4719
4720 xml_node xml_document::document_element() const
4721 {
4722 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4723 if ((i->header & xml_memory_page_type_mask) + 1 == node_element)
4724 return xml_node(i);
4725
4726 return xml_node();
4727 }
4728
4729#ifndef PUGIXML_NO_STL
4730 std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
4731 {
4732 assert(str);
4733
4734 return as_utf8_impl(str, wcslen(str));
4735 }
4736
4737 std::string PUGIXML_FUNCTION as_utf8(const std::wstring& str)
4738 {
4739 return as_utf8_impl(str.c_str(), str.size());
4740 }
4741
4742 std::wstring PUGIXML_FUNCTION as_wide(const char* str)
4743 {
4744 assert(str);
4745
4746 return as_wide_impl(str, strlen(str));
4747 }
4748
4749 std::wstring PUGIXML_FUNCTION as_wide(const std::string& str)
4750 {
4751 return as_wide_impl(str.c_str(), str.size());
4752 }
4753#endif
4754
4755 void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
4756 {
4757 global_allocate = allocate;
4758 global_deallocate = deallocate;
4759 }
4760
4761 allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
4762 {
4763 return global_allocate;
4764 }
4765
4766 deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
4767 {
4768 return global_deallocate;
4769 }
4770}
4771
4772#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
4773namespace std
4774{
4775 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
4776 std::bidirectional_iterator_tag _Iter_cat(const xml_node_iterator&)
4777 {
4778 return std::bidirectional_iterator_tag();
4779 }
4780
4781 std::bidirectional_iterator_tag _Iter_cat(const xml_attribute_iterator&)
4782 {
4783 return std::bidirectional_iterator_tag();
4784 }
4785}
4786#endif
4787
4788#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
4789namespace std
4790{
4791 // Workarounds for (non-standard) iterator category detection
4792 std::bidirectional_iterator_tag __iterator_category(const xml_node_iterator&)
4793 {
4794 return std::bidirectional_iterator_tag();
4795 }
4796
4797 std::bidirectional_iterator_tag __iterator_category(const xml_attribute_iterator&)
4798 {
4799 return std::bidirectional_iterator_tag();
4800 }
4801}
4802#endif
4803
4804#ifndef PUGIXML_NO_XPATH
4805
4806// STL replacements
4807namespace
4808{
4809 struct equal_to
4810 {
4811 template <typename T> bool operator()(const T& lhs, const T& rhs) const
4812 {
4813 return lhs == rhs;
4814 }
4815 };
4816
4817 struct not_equal_to
4818 {
4819 template <typename T> bool operator()(const T& lhs, const T& rhs) const
4820 {
4821 return lhs != rhs;
4822 }
4823 };
4824
4825 struct less
4826 {
4827 template <typename T> bool operator()(const T& lhs, const T& rhs) const
4828 {
4829 return lhs < rhs;
4830 }
4831 };
4832
4833 struct less_equal
4834 {
4835 template <typename T> bool operator()(const T& lhs, const T& rhs) const
4836 {
4837 return lhs <= rhs;
4838 }
4839 };
4840
4841 template <typename T> void swap(T& lhs, T& rhs)
4842 {
4843 T temp = lhs;
4844 lhs = rhs;
4845 rhs = temp;
4846 }
4847
4848 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
4849 {
4850 I result = begin;
4851
4852 for (I it = begin + 1; it != end; ++it)
4853 if (pred(*it, *result))
4854 result = it;
4855
4856 return result;
4857 }
4858
4859 template <typename I> void reverse(I begin, I end)
4860 {
4861 while (begin + 1 < end) swap(*begin++, *--end);
4862 }
4863
4864 template <typename I> I unique(I begin, I end)
4865 {
4866 // fast skip head
4867 while (begin + 1 < end && *begin != *(begin + 1)) begin++;
4868
4869 if (begin == end) return begin;
4870
4871 // last written element
4872 I write = begin++;
4873
4874 // merge unique elements
4875 while (begin != end)
4876 {
4877 if (*begin != *write)
4878 *++write = *begin++;
4879 else
4880 begin++;
4881 }
4882
4883 // past-the-end (write points to live element)
4884 return write + 1;
4885 }
4886
4887 template <typename I> void copy_backwards(I begin, I end, I target)
4888 {
4889 while (begin != end) *--target = *--end;
4890 }
4891
4892 template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
4893 {
4894 assert(begin != end);
4895
4896 for (I it = begin + 1; it != end; ++it)
4897 {
4898 T val = *it;
4899
4900 if (pred(val, *begin))
4901 {
4902 // move to front
4903 copy_backwards(begin, it, it + 1);
4904 *begin = val;
4905 }
4906 else
4907 {
4908 I hole = it;
4909
4910 // move hole backwards
4911 while (pred(val, *(hole - 1)))
4912 {
4913 *hole = *(hole - 1);
4914 hole--;
4915 }
4916
4917 // fill hole with element
4918 *hole = val;
4919 }
4920 }
4921 }
4922
4923 // std variant for elements with ==
4924 template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
4925 {
4926 I eqbeg = middle, eqend = middle + 1;
4927
4928 // expand equal range
4929 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
4930 while (eqend != end && *eqend == *eqbeg) ++eqend;
4931
4932 // process outer elements
4933 I ltend = eqbeg, gtbeg = eqend;
4934
4935 for (;;)
4936 {
4937 // find the element from the right side that belongs to the left one
4938 for (; gtbeg != end; ++gtbeg)
4939 if (!pred(*eqbeg, *gtbeg))
4940 {
4941 if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
4942 else break;
4943 }
4944
4945 // find the element from the left side that belongs to the right one
4946 for (; ltend != begin; --ltend)
4947 if (!pred(*(ltend - 1), *eqbeg))
4948 {
4949 if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
4950 else break;
4951 }
4952
4953 // scanned all elements
4954 if (gtbeg == end && ltend == begin)
4955 {
4956 *out_eqbeg = eqbeg;
4957 *out_eqend = eqend;
4958 return;
4959 }
4960
4961 // make room for elements by moving equal area
4962 if (gtbeg == end)
4963 {
4964 if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
4965 swap(*eqbeg, *--eqend);
4966 }
4967 else if (ltend == begin)
4968 {
4969 if (eqend != gtbeg) swap(*eqbeg, *eqend);
4970 ++eqend;
4971 swap(*gtbeg++, *eqbeg++);
4972 }
4973 else swap(*gtbeg++, *--ltend);
4974 }
4975 }
4976
4977 template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
4978 {
4979 if (pred(*middle, *first)) swap(*middle, *first);
4980 if (pred(*last, *middle)) swap(*last, *middle);
4981 if (pred(*middle, *first)) swap(*middle, *first);
4982 }
4983
4984 template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
4985 {
4986 if (last - first <= 40)
4987 {
4988 // median of three for small chunks
4989 median3(first, middle, last, pred);
4990 }
4991 else
4992 {
4993 // median of nine
4994 size_t step = (last - first + 1) / 8;
4995
4996 median3(first, first + step, first + 2 * step, pred);
4997 median3(middle - step, middle, middle + step, pred);
4998 median3(last - 2 * step, last - step, last, pred);
4999 median3(first + step, middle, last - step, pred);
5000 }
5001 }
5002
5003 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
5004 {
5005 // sort large chunks
5006 while (end - begin > 32)
5007 {
5008 // find median element
5009 I middle = begin + (end - begin) / 2;
5010 median(begin, middle, end - 1, pred);
5011
5012 // partition in three chunks (< = >)
5013 I eqbeg, eqend;
5014 partition(begin, middle, end, pred, &eqbeg, &eqend);
5015
5016 // loop on larger half
5017 if (eqbeg - begin > end - eqend)
5018 {
5019 sort(eqend, end, pred);
5020 end = eqbeg;
5021 }
5022 else
5023 {
5024 sort(begin, eqbeg, pred);
5025 begin = eqend;
5026 }
5027 }
5028
5029 // insertion sort small chunk
5030 if (begin != end) insertion_sort(begin, end, pred, &*begin);
5031 }
5032}
5033
5034// Allocator used for AST and evaluation stacks
5035namespace
5036{
5037 struct xpath_memory_block
5038 {
5039 xpath_memory_block* next;
5040
5041 char data[4096];
5042 };
5043
5044 class xpath_allocator
5045 {
5046 xpath_memory_block* _root;
5047 size_t _root_size;
5048
5049 public:
5050 #ifdef PUGIXML_NO_EXCEPTIONS
5051 jmp_buf* error_handler;
5052 #endif
5053
5054 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
5055 {
5056 #ifdef PUGIXML_NO_EXCEPTIONS
5057 error_handler = 0;
5058 #endif
5059 }
5060
5061 void* allocate_nothrow(size_t size)
5062 {
5063 const size_t block_capacity = sizeof(_root->data);
5064
5065 // align size so that we're able to store pointers in subsequent blocks
5066 size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5067
5068 if (_root_size + size <= block_capacity)
5069 {
5070 void* buf = _root->data + _root_size;
5071 _root_size += size;
5072 return buf;
5073 }
5074 else
5075 {
5076 size_t block_data_size = (size > block_capacity) ? size : block_capacity;
5077 size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
5078
5079 xpath_memory_block* block = static_cast<xpath_memory_block*>(global_allocate(block_size));
5080 if (!block) return 0;
5081
5082 block->next = _root;
5083
5084 _root = block;
5085 _root_size = size;
5086
5087 return block->data;
5088 }
5089 }
5090
5091 void* allocate(size_t size)
5092 {
5093 void* result = allocate_nothrow(size);
5094
5095 if (!result)
5096 {
5097 #ifdef PUGIXML_NO_EXCEPTIONS
5098 assert(error_handler);
5099 longjmp(*error_handler, 1);
5100 #else
5101 throw std::bad_alloc();
5102 #endif
5103 }
5104
5105 return result;
5106 }
5107
5108 void* reallocate(void* ptr, size_t old_size, size_t new_size)
5109 {
5110 // align size so that we're able to store pointers in subsequent blocks
5111 old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5112 new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5113
5114 // we can only reallocate the last object
5115 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
5116
5117 // adjust root size so that we have not allocated the object at all
5118 bool only_object = (_root_size == old_size);
5119
5120 if (ptr) _root_size -= old_size;
5121
5122 // allocate a new version (this will obviously reuse the memory if possible)
5123 void* result = allocate(new_size);
5124 assert(result);
5125
5126 // we have a new block
5127 if (result != ptr && ptr)
5128 {
5129 // copy old data
5130 assert(new_size > old_size);
5131 memcpy(result, ptr, old_size);
5132
5133 // free the previous page if it had no other objects
5134 if (only_object)
5135 {
5136 assert(_root->data == result);
5137 assert(_root->next);
5138
5139 xpath_memory_block* next = _root->next->next;
5140
5141 if (next)
5142 {
5143 // deallocate the whole page, unless it was the first one
5144 global_deallocate(_root->next);
5145 _root->next = next;
5146 }
5147 }
5148 }
5149
5150 return result;
5151 }
5152
5153 void revert(const xpath_allocator& state)
5154 {
5155 // free all new pages
5156 xpath_memory_block* cur = _root;
5157
5158 while (cur != state._root)
5159 {
5160 xpath_memory_block* next = cur->next;
5161
5162 global_deallocate(cur);
5163
5164 cur = next;
5165 }
5166
5167 // restore state
5168 _root = state._root;
5169 _root_size = state._root_size;
5170 }
5171
5172 void release()
5173 {
5174 xpath_memory_block* cur = _root;
5175 assert(cur);
5176
5177 while (cur->next)
5178 {
5179 xpath_memory_block* next = cur->next;
5180
5181 global_deallocate(cur);
5182
5183 cur = next;
5184 }
5185 }
5186 };
5187
5188 struct xpath_allocator_capture
5189 {
5190 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
5191 {
5192 }
5193
5194 ~xpath_allocator_capture()
5195 {
5196 _target->revert(_state);
5197 }
5198
5199 xpath_allocator* _target;
5200 xpath_allocator _state;
5201 };
5202
5203 struct xpath_stack
5204 {
5205 xpath_allocator* result;
5206 xpath_allocator* temp;
5207 };
5208
5209 struct xpath_stack_data
5210 {
5211 xpath_memory_block blocks[2];
5212 xpath_allocator result;
5213 xpath_allocator temp;
5214 xpath_stack stack;
5215
5216 #ifdef PUGIXML_NO_EXCEPTIONS
5217 jmp_buf error_handler;
5218 #endif
5219
5220 xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
5221 {
5222 blocks[0].next = blocks[1].next = 0;
5223
5224 stack.result = &result;
5225 stack.temp = &temp;
5226
5227 #ifdef PUGIXML_NO_EXCEPTIONS
5228 result.error_handler = temp.error_handler = &error_handler;
5229 #endif
5230 }
5231
5232 ~xpath_stack_data()
5233 {
5234 result.release();
5235 temp.release();
5236 }
5237 };
5238}
5239
5240// String class
5241namespace
5242{
5243 class xpath_string
5244 {
5245 const char_t* _buffer;
5246 bool _uses_heap;
5247
5248 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
5249 {
5250 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
5251 assert(result);
5252
5253 memcpy(result, string, length * sizeof(char_t));
5254 result[length] = 0;
5255
5256 return result;
5257 }
5258
5259 static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)
5260 {
5261 return duplicate_string(string, strlength(string), alloc);
5262 }
5263
5264 public:
5265 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
5266 {
5267 }
5268
5269 explicit xpath_string(const char_t* str, xpath_allocator* alloc)
5270 {
5271 bool empty = (*str == 0);
5272
5273 _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
5274 _uses_heap = !empty;
5275 }
5276
5277 explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
5278 {
5279 }
5280
5281 xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)
5282 {
5283 assert(begin <= end);
5284
5285 bool empty = (begin == end);
5286
5287 _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
5288 _uses_heap = !empty;
5289 }
5290
5291 void append(const xpath_string& o, xpath_allocator* alloc)
5292 {
5293 // skip empty sources
5294 if (!*o._buffer) return;
5295
5296 // fast append for constant empty target and constant source
5297 if (!*_buffer && !_uses_heap && !o._uses_heap)
5298 {
5299 _buffer = o._buffer;
5300 }
5301 else
5302 {
5303 // need to make heap copy
5304 size_t target_length = strlength(_buffer);
5305 size_t source_length = strlength(o._buffer);
5306 size_t length = target_length + source_length;
5307
5308 // allocate new buffer
5309 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (length + 1) * sizeof(char_t)));
5310 assert(result);
5311
5312 // append first string to the new buffer in case there was no reallocation
5313 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
5314
5315 // append second string to the new buffer
5316 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
5317 result[length] = 0;
5318
5319 // finalize
5320 _buffer = result;
5321 _uses_heap = true;
5322 }
5323 }
5324
5325 const char_t* c_str() const
5326 {
5327 return _buffer;
5328 }
5329
5330 size_t length() const
5331 {
5332 return strlength(_buffer);
5333 }
5334
5335 char_t* data(xpath_allocator* alloc)
5336 {
5337 // make private heap copy
5338 if (!_uses_heap)
5339 {
5340 _buffer = duplicate_string(_buffer, alloc);
5341 _uses_heap = true;
5342 }
5343
5344 return const_cast<char_t*>(_buffer);
5345 }
5346
5347 bool empty() const
5348 {
5349 return *_buffer == 0;
5350 }
5351
5352 bool operator==(const xpath_string& o) const
5353 {
5354 return strequal(_buffer, o._buffer);
5355 }
5356
5357 bool operator!=(const xpath_string& o) const
5358 {
5359 return !strequal(_buffer, o._buffer);
5360 }
5361
5362 bool uses_heap() const
5363 {
5364 return _uses_heap;
5365 }
5366 };
5367
5368 xpath_string xpath_string_const(const char_t* str)
5369 {
5370 return xpath_string(str, false);
5371 }
5372}
5373
5374namespace
5375{
5376 bool starts_with(const char_t* string, const char_t* pattern)
5377 {
5378 while (*pattern && *string == *pattern)
5379 {
5380 string++;
5381 pattern++;
5382 }
5383
5384 return *pattern == 0;
5385 }
5386
5387 const char_t* find_char(const char_t* s, char_t c)
5388 {
5389 #ifdef PUGIXML_WCHAR_MODE
5390 return wcschr(s, c);
5391 #else
5392 return strchr(s, c);
5393 #endif
5394 }
5395
5396 const char_t* find_substring(const char_t* s, const char_t* p)
5397 {
5398 #ifdef PUGIXML_WCHAR_MODE
5399 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
5400 return (*p == 0) ? s : wcsstr(s, p);
5401 #else
5402 return strstr(s, p);
5403 #endif
5404 }
5405
5406 // Converts symbol to lower case, if it is an ASCII one
5407 char_t tolower_ascii(char_t ch)
5408 {
5409 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
5410 }
5411
5412 xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
5413 {
5414 if (na.attribute())
5415 return xpath_string_const(na.attribute().value());
5416 else
5417 {
5418 const xml_node& n = na.node();
5419
5420 switch (n.type())
5421 {
5422 case node_pcdata:
5423 case node_cdata:
5424 case node_comment:
5425 case node_pi:
5426 return xpath_string_const(n.value());
5427
5428 case node_document:
5429 case node_element:
5430 {
5431 xpath_string result;
5432
5433 xml_node cur = n.first_child();
5434
5435 while (cur && cur != n)
5436 {
5437 if (cur.type() == node_pcdata || cur.type() == node_cdata)
5438 result.append(xpath_string_const(cur.value()), alloc);
5439
5440 if (cur.first_child())
5441 cur = cur.first_child();
5442 else if (cur.next_sibling())
5443 cur = cur.next_sibling();
5444 else
5445 {
5446 while (!cur.next_sibling() && cur != n)
5447 cur = cur.parent();
5448
5449 if (cur != n) cur = cur.next_sibling();
5450 }
5451 }
5452
5453 return result;
5454 }
5455
5456 default:
5457 return xpath_string();
5458 }
5459 }
5460 }
5461
5462 unsigned int node_height(xml_node n)
5463 {
5464 unsigned int result = 0;
5465
5466 while (n)
5467 {
5468 ++result;
5469 n = n.parent();
5470 }
5471
5472 return result;
5473 }
5474
5475 bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
5476 {
5477 // normalize heights
5478 for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
5479 for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
5480
5481 // one node is the ancestor of the other
5482 if (ln == rn) return lh < rh;
5483
5484 // find common ancestor
5485 while (ln.parent() != rn.parent())
5486 {
5487 ln = ln.parent();
5488 rn = rn.parent();
5489 }
5490
5491 // there is no common ancestor (the shared parent is null), nodes are from different documents
5492 if (!ln.parent()) return ln < rn;
5493
5494 // determine sibling order
5495 for (; ln; ln = ln.next_sibling())
5496 if (ln == rn)
5497 return true;
5498
5499 return false;
5500 }
5501
5502 bool node_is_ancestor(xml_node parent, xml_node node)
5503 {
5504 while (node && node != parent) node = node.parent();
5505
5506 return parent && node == parent;
5507 }
5508
5509 const void* document_order(const xpath_node& xnode)
5510 {
5511 xml_node_struct* node = xnode.node().internal_object();
5512
5513 if (node)
5514 {
5515 if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
5516 if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
5517 return 0;
5518 }
5519
5520 xml_attribute_struct* attr = xnode.attribute().internal_object();
5521
5522 if (attr)
5523 {
5524 if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
5525 if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
5526 return 0;
5527 }
5528
5529 return 0;
5530 }
5531
5532 struct document_order_comparator
5533 {
5534 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
5535 {
5536 // optimized document order based check
5537 const void* lo = document_order(lhs);
5538 const void* ro = document_order(rhs);
5539
5540 if (lo && ro) return lo < ro;
5541
5542 // slow comparison
5543 xml_node ln = lhs.node(), rn = rhs.node();
5544
5545 // compare attributes
5546 if (lhs.attribute() && rhs.attribute())
5547 {
5548 // shared parent
5549 if (lhs.parent() == rhs.parent())
5550 {
5551 // determine sibling order
5552 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
5553 if (a == rhs.attribute())
5554 return true;
5555
5556 return false;
5557 }
5558
5559 // compare attribute parents
5560 ln = lhs.parent();
5561 rn = rhs.parent();
5562 }
5563 else if (lhs.attribute())
5564 {
5565 // attributes go after the parent element
5566 if (lhs.parent() == rhs.node()) return false;
5567
5568 ln = lhs.parent();
5569 }
5570 else if (rhs.attribute())
5571 {
5572 // attributes go after the parent element
5573 if (rhs.parent() == lhs.node()) return true;
5574
5575 rn = rhs.parent();
5576 }
5577
5578 if (ln == rn) return false;
5579
5580 unsigned int lh = node_height(ln);
5581 unsigned int rh = node_height(rn);
5582
5583 return node_is_before(ln, lh, rn, rh);
5584 }
5585 };
5586
5587 struct duplicate_comparator
5588 {
5589 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
5590 {
5591 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
5592 else return rhs.attribute() ? false : lhs.node() < rhs.node();
5593 }
5594 };
5595
5596 double gen_nan()
5597 {
5598 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
5599 union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1];
5600 u[0].i = 0x7fc00000;
5601 return u[0].f;
5602 #else
5603 // fallback
5604 const volatile double zero = 0.0;
5605 return zero / zero;
5606 #endif
5607 }
5608
5609 bool is_nan(double value)
5610 {
5611 #if defined(_MSC_VER) || defined(__BORLANDC__)
5612 return !!_isnan(value);
5613 #elif defined(fpclassify) && defined(FP_NAN)
5614 return fpclassify(value) == FP_NAN;
5615 #else
5616 // fallback
5617 const volatile double v = value;
5618 return v != v;
5619 #endif
5620 }
5621
5622 const char_t* convert_number_to_string_special(double value)
5623 {
5624 #if defined(_MSC_VER) || defined(__BORLANDC__)
5625 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
5626 if (_isnan(value)) return PUGIXML_TEXT("NaN");
5627 return PUGIXML_TEXT("-Infinity") + (value > 0);
5628 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
5629 switch (fpclassify(value))
5630 {
5631 case FP_NAN:
5632 return PUGIXML_TEXT("NaN");
5633
5634 case FP_INFINITE:
5635 return PUGIXML_TEXT("-Infinity") + (value > 0);
5636
5637 case FP_ZERO:
5638 return PUGIXML_TEXT("0");
5639
5640 default:
5641 return 0;
5642 }
5643 #else
5644 // fallback
5645 const volatile double v = value;
5646
5647 if (v == 0) return PUGIXML_TEXT("0");
5648 if (v != v) return PUGIXML_TEXT("NaN");
5649 if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0);
5650 return 0;
5651 #endif
5652 }
5653
5654 bool convert_number_to_boolean(double value)
5655 {
5656 return (value != 0 && !is_nan(value));
5657 }
5658
5659 void truncate_zeros(char* begin, char* end)
5660 {
5661 while (begin != end && end[-1] == '0') end--;
5662
5663 *end = 0;
5664 }
5665
5666 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
5667#if defined(_MSC_VER) && _MSC_VER >= 1400
5668 void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
5669 {
5670 // get base values
5671 int sign, exponent;
5672 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
5673
5674 // truncate redundant zeros
5675 truncate_zeros(buffer, buffer + strlen(buffer));
5676
5677 // fill results
5678 *out_mantissa = buffer;
5679 *out_exponent = exponent;
5680 }
5681#else
5682 void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
5683 {
5684 // get a scientific notation value with IEEE DBL_DIG decimals
5685 sprintf(buffer, "%.*e", DBL_DIG, value);
5686 assert(strlen(buffer) < buffer_size);
5687 (void)!buffer_size;
5688
5689 // get the exponent (possibly negative)
5690 char* exponent_string = strchr(buffer, 'e');
5691 assert(exponent_string);
5692
5693 int exponent = atoi(exponent_string + 1);
5694
5695 // extract mantissa string: skip sign
5696 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
5697 assert(mantissa[0] != '0' && mantissa[1] == '.');
5698
5699 // divide mantissa by 10 to eliminate integer part
5700 mantissa[1] = mantissa[0];
5701 mantissa++;
5702 exponent++;
5703
5704 // remove extra mantissa digits and zero-terminate mantissa
5705 truncate_zeros(mantissa, exponent_string);
5706
5707 // fill results
5708 *out_mantissa = mantissa;
5709 *out_exponent = exponent;
5710 }
5711#endif
5712
5713 xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
5714 {
5715 // try special number conversion
5716 const char_t* special = convert_number_to_string_special(value);
5717 if (special) return xpath_string_const(special);
5718
5719 // get mantissa + exponent form
5720 char mantissa_buffer[64];
5721
5722 char* mantissa;
5723 int exponent;
5724 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
5725
5726 // make the number!
5727 char_t result[512];
5728 char_t* s = result;
5729
5730 // sign
5731 if (value < 0) *s++ = '-';
5732
5733 // integer part
5734 if (exponent <= 0)
5735 {
5736 *s++ = '0';
5737 }
5738 else
5739 {
5740 while (exponent > 0)
5741 {
5742 assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9);
5743 *s++ = *mantissa ? *mantissa++ : '0';
5744 exponent--;
5745 }
5746 }
5747
5748 // fractional part
5749 if (*mantissa)
5750 {
5751 // decimal point
5752 *s++ = '.';
5753
5754 // extra zeroes from negative exponent
5755 while (exponent < 0)
5756 {
5757 *s++ = '0';
5758 exponent++;
5759 }
5760
5761 // extra mantissa digits
5762 while (*mantissa)
5763 {
5764 assert((unsigned)(*mantissa - '0') <= 9);
5765 *s++ = *mantissa++;
5766 }
5767 }
5768
5769 // zero-terminate
5770 assert(s < result + sizeof(result) / sizeof(result[0]));
5771 *s = 0;
5772
5773 return xpath_string(result, alloc);
5774 }
5775
5776 bool check_string_to_number_format(const char_t* string)
5777 {
5778 // parse leading whitespace
5779 while (IS_CHARTYPE(*string, ct_space)) ++string;
5780
5781 // parse sign
5782 if (*string == '-') ++string;
5783
5784 if (!*string) return false;
5785
5786 // if there is no integer part, there should be a decimal part with at least one digit
5787 if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false;
5788
5789 // parse integer part
5790 while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
5791
5792 // parse decimal part
5793 if (*string == '.')
5794 {
5795 ++string;
5796
5797 while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
5798 }
5799
5800 // parse trailing whitespace
5801 while (IS_CHARTYPE(*string, ct_space)) ++string;
5802
5803 return *string == 0;
5804 }
5805
5806 double convert_string_to_number(const char_t* string)
5807 {
5808 // check string format
5809 if (!check_string_to_number_format(string)) return gen_nan();
5810
5811 // parse string
5812 #ifdef PUGIXML_WCHAR_MODE
5813 return wcstod(string, 0);
5814 #else
5815 return atof(string);
5816 #endif
5817 }
5818
5819 bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
5820 {
5821 char_t buffer[32];
5822
5823 size_t length = static_cast<size_t>(end - begin);
5824 char_t* scratch = buffer;
5825
5826 if (length >= sizeof(buffer) / sizeof(buffer[0]))
5827 {
5828 // need to make dummy on-heap copy
5829 scratch = static_cast<char_t*>(global_allocate((length + 1) * sizeof(char_t)));
5830 if (!scratch) return false;
5831 }
5832
5833 // copy string to zero-terminated buffer and perform conversion
5834 memcpy(scratch, begin, length * sizeof(char_t));
5835 scratch[length] = 0;
5836
5837 *out_result = convert_string_to_number(scratch);
5838
5839 // free dummy buffer
5840 if (scratch != buffer) global_deallocate(scratch);
5841
5842 return true;
5843 }
5844
5845 double round_nearest(double value)
5846 {
5847 return floor(value + 0.5);
5848 }
5849
5850 double round_nearest_nzero(double value)
5851 {
5852 // same as round_nearest, but returns -0 for [-0.5, -0]
5853 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
5854 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
5855 }
5856
5857 const char_t* qualified_name(const xpath_node& node)
5858 {
5859 return node.attribute() ? node.attribute().name() : node.node().name();
5860 }
5861
5862 const char_t* local_name(const xpath_node& node)
5863 {
5864 const char_t* name = qualified_name(node);
5865 const char_t* p = find_char(name, ':');
5866
5867 return p ? p + 1 : name;
5868 }
5869
5870 struct namespace_uri_predicate
5871 {
5872 const char_t* prefix;
5873 size_t prefix_length;
5874
5875 namespace_uri_predicate(const char_t* name)
5876 {
5877 const char_t* pos = find_char(name, ':');
5878
5879 prefix = pos ? name : 0;
5880 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
5881 }
5882
5883 bool operator()(const xml_attribute& a) const
5884 {
5885 const char_t* name = a.name();
5886
5887 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
5888
5889 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
5890 }
5891 };
5892
5893 const char_t* namespace_uri(const xml_node& node)
5894 {
5895 namespace_uri_predicate pred = node.name();
5896
5897 xml_node p = node;
5898
5899 while (p)
5900 {
5901 xml_attribute a = p.find_attribute(pred);
5902
5903 if (a) return a.value();
5904
5905 p = p.parent();
5906 }
5907
5908 return PUGIXML_TEXT("");
5909 }
5910
5911 const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
5912 {
5913 namespace_uri_predicate pred = attr.name();
5914
5915 // Default namespace does not apply to attributes
5916 if (!pred.prefix) return PUGIXML_TEXT("");
5917
5918 xml_node p = parent;
5919
5920 while (p)
5921 {
5922 xml_attribute a = p.find_attribute(pred);
5923
5924 if (a) return a.value();
5925
5926 p = p.parent();
5927 }
5928
5929 return PUGIXML_TEXT("");
5930 }
5931
5932 const char_t* namespace_uri(const xpath_node& node)
5933 {
5934 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
5935 }
5936
5937 void normalize_space(char_t* buffer)
5938 {
5939 char_t* write = buffer;
5940
5941 for (char_t* it = buffer; *it; )
5942 {
5943 char_t ch = *it++;
5944
5945 if (IS_CHARTYPE(ch, ct_space))
5946 {
5947 // replace whitespace sequence with single space
5948 while (IS_CHARTYPE(*it, ct_space)) it++;
5949
5950 // avoid leading spaces
5951 if (write != buffer) *write++ = ' ';
5952 }
5953 else *write++ = ch;
5954 }
5955
5956 // remove trailing space
5957 if (write != buffer && IS_CHARTYPE(write[-1], ct_space)) write--;
5958
5959 // zero-terminate
5960 *write = 0;
5961 }
5962
5963 void translate(char_t* buffer, const char_t* from, const char_t* to)
5964 {
5965 size_t to_length = strlength(to);
5966
5967 char_t* write = buffer;
5968
5969 while (*buffer)
5970 {
5971 DMC_VOLATILE char_t ch = *buffer++;
5972
5973 const char_t* pos = find_char(from, ch);
5974
5975 if (!pos)
5976 *write++ = ch; // do not process
5977 else if (static_cast<size_t>(pos - from) < to_length)
5978 *write++ = to[pos - from]; // replace
5979 }
5980
5981 // zero-terminate
5982 *write = 0;
5983 }
5984
5985 struct xpath_variable_boolean: xpath_variable
5986 {
5987 xpath_variable_boolean(): value(false)
5988 {
5989 }
5990
5991 bool value;
5992 char_t name[1];
5993 };
5994
5995 struct xpath_variable_number: xpath_variable
5996 {
5997 xpath_variable_number(): value(0)
5998 {
5999 }
6000
6001 double value;
6002 char_t name[1];
6003 };
6004
6005 struct xpath_variable_string: xpath_variable
6006 {
6007 xpath_variable_string(): value(0)
6008 {
6009 }
6010
6011 ~xpath_variable_string()
6012 {
6013 if (value) global_deallocate(value);
6014 }
6015
6016 char_t* value;
6017 char_t name[1];
6018 };
6019
6020 struct xpath_variable_node_set: xpath_variable
6021 {
6022 xpath_node_set value;
6023 char_t name[1];
6024 };
6025
6026 const xpath_node_set dummy_node_set;
6027
6028 unsigned int hash_string(const char_t* str)
6029 {
6030 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
6031 unsigned int result = 0;
6032
6033 while (*str)
6034 {
6035 result += static_cast<unsigned int>(*str++);
6036 result += result << 10;
6037 result ^= result >> 6;
6038 }
6039
6040 result += result << 3;
6041 result ^= result >> 11;
6042 result += result << 15;
6043
6044 return result;
6045 }
6046
6047 template <typename T> T* new_xpath_variable(const char_t* name)
6048 {
6049 size_t length = strlength(name);
6050 if (length == 0) return 0; // empty variable names are invalid
6051
6052 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
6053 void* memory = global_allocate(sizeof(T) + length * sizeof(char_t));
6054 if (!memory) return 0;
6055
6056 T* result = new (memory) T();
6057
6058 memcpy(result->name, name, (length + 1) * sizeof(char_t));
6059
6060 return result;
6061 }
6062
6063 xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
6064 {
6065 switch (type)
6066 {
6067 case xpath_type_node_set:
6068 return new_xpath_variable<xpath_variable_node_set>(name);
6069
6070 case xpath_type_number:
6071 return new_xpath_variable<xpath_variable_number>(name);
6072
6073 case xpath_type_string:
6074 return new_xpath_variable<xpath_variable_string>(name);
6075
6076 case xpath_type_boolean:
6077 return new_xpath_variable<xpath_variable_boolean>(name);
6078
6079 default:
6080 return 0;
6081 }
6082 }
6083
6084 template <typename T> void delete_xpath_variable(T* var)
6085 {
6086 var->~T();
6087 global_deallocate(var);
6088 }
6089
6090 void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
6091 {
6092 switch (type)
6093 {
6094 case xpath_type_node_set:
6095 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
6096 break;
6097
6098 case xpath_type_number:
6099 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
6100 break;
6101
6102 case xpath_type_string:
6103 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
6104 break;
6105
6106 case xpath_type_boolean:
6107 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
6108 break;
6109
6110 default:
6111 assert(!"Invalid variable type");
6112 }
6113 }
6114
6115 xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end)
6116 {
6117 char_t buffer[32];
6118
6119 size_t length = static_cast<size_t>(end - begin);
6120 char_t* scratch = buffer;
6121
6122 if (length >= sizeof(buffer) / sizeof(buffer[0]))
6123 {
6124 // need to make dummy on-heap copy
6125 scratch = static_cast<char_t*>(global_allocate((length + 1) * sizeof(char_t)));
6126 if (!scratch) return 0;
6127 }
6128
6129 // copy string to zero-terminated buffer and perform lookup
6130 memcpy(scratch, begin, length * sizeof(char_t));
6131 scratch[length] = 0;
6132
6133 xpath_variable* result = set->get(scratch);
6134
6135 // free dummy buffer
6136 if (scratch != buffer) global_deallocate(scratch);
6137
6138 return result;
6139 }
6140}
6141
6142// Internal node set class
6143namespace
6144{
6145 xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
6146 {
6147 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
6148
6149 if (type == xpath_node_set::type_unsorted)
6150 {
6151 sort(begin, end, document_order_comparator());
6152
6153 type = xpath_node_set::type_sorted;
6154 }
6155
6156 if (type != order) reverse(begin, end);
6157
6158 return order;
6159 }
6160
6161 xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
6162 {
6163 if (begin == end) return xpath_node();
6164
6165 switch (type)
6166 {
6167 case xpath_node_set::type_sorted:
6168 return *begin;
6169
6170 case xpath_node_set::type_sorted_reverse:
6171 return *(end - 1);
6172
6173 case xpath_node_set::type_unsorted:
6174 return *min_element(begin, end, document_order_comparator());
6175
6176 default:
6177 assert(!"Invalid node set type");
6178 return xpath_node();
6179 }
6180 }
6181 class xpath_node_set_raw
6182 {
6183 xpath_node_set::type_t _type;
6184
6185 xpath_node* _begin;
6186 xpath_node* _end;
6187 xpath_node* _eos;
6188
6189 public:
6190 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
6191 {
6192 }
6193
6194 xpath_node* begin() const
6195 {
6196 return _begin;
6197 }
6198
6199 xpath_node* end() const
6200 {
6201 return _end;
6202 }
6203
6204 bool empty() const
6205 {
6206 return _begin == _end;
6207 }
6208
6209 size_t size() const
6210 {
6211 return static_cast<size_t>(_end - _begin);
6212 }
6213
6214 xpath_node first() const
6215 {
6216 return xpath_first(_begin, _end, _type);
6217 }
6218
6219 void push_back(const xpath_node& node, xpath_allocator* alloc)
6220 {
6221 if (_end == _eos)
6222 {
6223 size_t capacity = static_cast<size_t>(_eos - _begin);
6224
6225 // get new capacity (1.5x rule)
6226 size_t new_capacity = capacity + capacity / 2 + 1;
6227
6228 // reallocate the old array or allocate a new one
6229 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
6230 assert(data);
6231
6232 // finalize
6233 _begin = data;
6234 _end = data + capacity;
6235 _eos = data + new_capacity;
6236 }
6237
6238 *_end++ = node;
6239 }
6240
6241 void append(const xpath_node* begin, const xpath_node* end, xpath_allocator* alloc)
6242 {
6243 size_t size = static_cast<size_t>(_end - _begin);
6244 size_t capacity = static_cast<size_t>(_eos - _begin);
6245 size_t count = static_cast<size_t>(end - begin);
6246
6247 if (size + count > capacity)
6248 {
6249 // reallocate the old array or allocate a new one
6250 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size + count) * sizeof(xpath_node)));
6251 assert(data);
6252
6253 // finalize
6254 _begin = data;
6255 _end = data + size;
6256 _eos = data + size + count;
6257 }
6258
6259 memcpy(_end, begin, count * sizeof(xpath_node));
6260 _end += count;
6261 }
6262
6263 void sort_do()
6264 {
6265 _type = xpath_sort(_begin, _end, _type, false);
6266 }
6267
6268 void truncate(xpath_node* pos)
6269 {
6270 assert(_begin <= pos && pos <= _end);
6271
6272 _end = pos;
6273 }
6274
6275 void remove_duplicates()
6276 {
6277 if (_type == xpath_node_set::type_unsorted)
6278 sort(_begin, _end, duplicate_comparator());
6279
6280 _end = unique(_begin, _end);
6281 }
6282
6283 xpath_node_set::type_t type() const
6284 {
6285 return _type;
6286 }
6287
6288 void set_type(xpath_node_set::type_t type)
6289 {
6290 _type = type;
6291 }
6292 };
6293}
6294
6295namespace
6296{
6297 struct xpath_context
6298 {
6299 xpath_node n;
6300 size_t position, size;
6301
6302 xpath_context(const xpath_node& n, size_t position, size_t size): n(n), position(position), size(size)
6303 {
6304 }
6305 };
6306
6307 enum lexeme_t
6308 {
6309 lex_none = 0,
6310 lex_equal,
6311 lex_not_equal,
6312 lex_less,
6313 lex_greater,
6314 lex_less_or_equal,
6315 lex_greater_or_equal,
6316 lex_plus,
6317 lex_minus,
6318 lex_multiply,
6319 lex_union,
6320 lex_var_ref,
6321 lex_open_brace,
6322 lex_close_brace,
6323 lex_quoted_string,
6324 lex_number,
6325 lex_slash,
6326 lex_double_slash,
6327 lex_open_square_brace,
6328 lex_close_square_brace,
6329 lex_string,
6330 lex_comma,
6331 lex_axis_attribute,
6332 lex_dot,
6333 lex_double_dot,
6334 lex_double_colon,
6335 lex_eof
6336 };
6337
6338 struct xpath_lexer_string
6339 {
6340 const char_t* begin;
6341 const char_t* end;
6342
6343 xpath_lexer_string(): begin(0), end(0)
6344 {
6345 }
6346
6347 bool operator==(const char_t* other) const
6348 {
6349 size_t length = static_cast<size_t>(end - begin);
6350
6351 return strequalrange(other, begin, length);
6352 }
6353 };
6354
6355 class xpath_lexer
6356 {
6357 const char_t* _cur;
6358 const char_t* _cur_lexeme_pos;
6359 xpath_lexer_string _cur_lexeme_contents;
6360
6361 lexeme_t _cur_lexeme;
6362
6363 public:
6364 explicit xpath_lexer(const char_t* query): _cur(query)
6365 {
6366 next();
6367 }
6368
6369 const char_t* state() const
6370 {
6371 return _cur;
6372 }
6373
6374 void next()
6375 {
6376 const char_t* cur = _cur;
6377
6378 while (IS_CHARTYPE(*cur, ct_space)) ++cur;
6379
6380 // save lexeme position for error reporting
6381 _cur_lexeme_pos = cur;
6382
6383 switch (*cur)
6384 {
6385 case 0:
6386 _cur_lexeme = lex_eof;
6387 break;
6388
6389 case '>':
6390 if (*(cur+1) == '=')
6391 {
6392 cur += 2;
6393 _cur_lexeme = lex_greater_or_equal;
6394 }
6395 else
6396 {
6397 cur += 1;
6398 _cur_lexeme = lex_greater;
6399 }
6400 break;
6401
6402 case '<':
6403 if (*(cur+1) == '=')
6404 {
6405 cur += 2;
6406 _cur_lexeme = lex_less_or_equal;
6407 }
6408 else
6409 {
6410 cur += 1;
6411 _cur_lexeme = lex_less;
6412 }
6413 break;
6414
6415 case '!':
6416 if (*(cur+1) == '=')
6417 {
6418 cur += 2;
6419 _cur_lexeme = lex_not_equal;
6420 }
6421 else
6422 {
6423 _cur_lexeme = lex_none;
6424 }
6425 break;
6426
6427 case '=':
6428 cur += 1;
6429 _cur_lexeme = lex_equal;
6430
6431 break;
6432
6433 case '+':
6434 cur += 1;
6435 _cur_lexeme = lex_plus;
6436
6437 break;
6438
6439 case '-':
6440 cur += 1;
6441 _cur_lexeme = lex_minus;
6442
6443 break;
6444
6445 case '*':
6446 cur += 1;
6447 _cur_lexeme = lex_multiply;
6448
6449 break;
6450
6451 case '|':
6452 cur += 1;
6453 _cur_lexeme = lex_union;
6454
6455 break;
6456
6457 case '$':
6458 cur += 1;
6459
6460 if (IS_CHARTYPEX(*cur, ctx_start_symbol))
6461 {
6462 _cur_lexeme_contents.begin = cur;
6463
6464 while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6465
6466 if (cur[0] == ':' && IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
6467 {
6468 cur++; // :
6469
6470 while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6471 }
6472
6473 _cur_lexeme_contents.end = cur;
6474
6475 _cur_lexeme = lex_var_ref;
6476 }
6477 else
6478 {
6479 _cur_lexeme = lex_none;
6480 }
6481
6482 break;
6483
6484 case '(':
6485 cur += 1;
6486 _cur_lexeme = lex_open_brace;
6487
6488 break;
6489
6490 case ')':
6491 cur += 1;
6492 _cur_lexeme = lex_close_brace;
6493
6494 break;
6495
6496 case '[':
6497 cur += 1;
6498 _cur_lexeme = lex_open_square_brace;
6499
6500 break;
6501
6502 case ']':
6503 cur += 1;
6504 _cur_lexeme = lex_close_square_brace;
6505
6506 break;
6507
6508 case ',':
6509 cur += 1;
6510 _cur_lexeme = lex_comma;
6511
6512 break;
6513
6514 case '/':
6515 if (*(cur+1) == '/')
6516 {
6517 cur += 2;
6518 _cur_lexeme = lex_double_slash;
6519 }
6520 else
6521 {
6522 cur += 1;
6523 _cur_lexeme = lex_slash;
6524 }
6525 break;
6526
6527 case '.':
6528 if (*(cur+1) == '.')
6529 {
6530 cur += 2;
6531 _cur_lexeme = lex_double_dot;
6532 }
6533 else if (IS_CHARTYPEX(*(cur+1), ctx_digit))
6534 {
6535 _cur_lexeme_contents.begin = cur; // .
6536
6537 ++cur;
6538
6539 while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6540
6541 _cur_lexeme_contents.end = cur;
6542
6543 _cur_lexeme = lex_number;
6544 }
6545 else
6546 {
6547 cur += 1;
6548 _cur_lexeme = lex_dot;
6549 }
6550 break;
6551
6552 case '@':
6553 cur += 1;
6554 _cur_lexeme = lex_axis_attribute;
6555
6556 break;
6557
6558 case '"':
6559 case '\'':
6560 {
6561 char_t terminator = *cur;
6562
6563 ++cur;
6564
6565 _cur_lexeme_contents.begin = cur;
6566 while (*cur && *cur != terminator) cur++;
6567 _cur_lexeme_contents.end = cur;
6568
6569 if (!*cur)
6570 _cur_lexeme = lex_none;
6571 else
6572 {
6573 cur += 1;
6574 _cur_lexeme = lex_quoted_string;
6575 }
6576
6577 break;
6578 }
6579
6580 case ':':
6581 if (*(cur+1) == ':')
6582 {
6583 cur += 2;
6584 _cur_lexeme = lex_double_colon;
6585 }
6586 else
6587 {
6588 _cur_lexeme = lex_none;
6589 }
6590 break;
6591
6592 default:
6593 if (IS_CHARTYPEX(*cur, ctx_digit))
6594 {
6595 _cur_lexeme_contents.begin = cur;
6596
6597 while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6598
6599 if (*cur == '.')
6600 {
6601 cur++;
6602
6603 while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6604 }
6605
6606 _cur_lexeme_contents.end = cur;
6607
6608 _cur_lexeme = lex_number;
6609 }
6610 else if (IS_CHARTYPEX(*cur, ctx_start_symbol))
6611 {
6612 _cur_lexeme_contents.begin = cur;
6613
6614 while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6615
6616 if (cur[0] == ':')
6617 {
6618 if (cur[1] == '*') // namespace test ncname:*
6619 {
6620 cur += 2; // :*
6621 }
6622 else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
6623 {
6624 cur++; // :
6625
6626 while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6627 }
6628 }
6629
6630 _cur_lexeme_contents.end = cur;
6631
6632 _cur_lexeme = lex_string;
6633 }
6634 else
6635 {
6636 _cur_lexeme = lex_none;
6637 }
6638 }
6639
6640 _cur = cur;
6641 }
6642
6643 lexeme_t current() const
6644 {
6645 return _cur_lexeme;
6646 }
6647
6648 const char_t* current_pos() const
6649 {
6650 return _cur_lexeme_pos;
6651 }
6652
6653 const xpath_lexer_string& contents() const
6654 {
6655 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
6656
6657 return _cur_lexeme_contents;
6658 }
6659 };
6660
6661 enum ast_type_t
6662 {
6663 ast_op_or, // left or right
6664 ast_op_and, // left and right
6665 ast_op_equal, // left = right
6666 ast_op_not_equal, // left != right
6667 ast_op_less, // left < right
6668 ast_op_greater, // left > right
6669 ast_op_less_or_equal, // left <= right
6670 ast_op_greater_or_equal, // left >= right
6671 ast_op_add, // left + right
6672 ast_op_subtract, // left - right
6673 ast_op_multiply, // left * right
6674 ast_op_divide, // left / right
6675 ast_op_mod, // left % right
6676 ast_op_negate, // left - right
6677 ast_op_union, // left | right
6678 ast_predicate, // apply predicate to set; next points to next predicate
6679 ast_filter, // select * from left where right
6680 ast_filter_posinv, // select * from left where right; proximity position invariant
6681 ast_string_constant, // string constant
6682 ast_number_constant, // number constant
6683 ast_variable, // variable
6684 ast_func_last, // last()
6685 ast_func_position, // position()
6686 ast_func_count, // count(left)
6687 ast_func_id, // id(left)
6688 ast_func_local_name_0, // local-name()
6689 ast_func_local_name_1, // local-name(left)
6690 ast_func_namespace_uri_0, // namespace-uri()
6691 ast_func_namespace_uri_1, // namespace-uri(left)
6692 ast_func_name_0, // name()
6693 ast_func_name_1, // name(left)
6694 ast_func_string_0, // string()
6695 ast_func_string_1, // string(left)
6696 ast_func_concat, // concat(left, right, siblings)
6697 ast_func_starts_with, // starts_with(left, right)
6698 ast_func_contains, // contains(left, right)
6699 ast_func_substring_before, // substring-before(left, right)
6700 ast_func_substring_after, // substring-after(left, right)
6701 ast_func_substring_2, // substring(left, right)
6702 ast_func_substring_3, // substring(left, right, third)
6703 ast_func_string_length_0, // string-length()
6704 ast_func_string_length_1, // string-length(left)
6705 ast_func_normalize_space_0, // normalize-space()
6706 ast_func_normalize_space_1, // normalize-space(left)
6707 ast_func_translate, // translate(left, right, third)
6708 ast_func_boolean, // boolean(left)
6709 ast_func_not, // not(left)
6710 ast_func_true, // true()
6711 ast_func_false, // false()
6712 ast_func_lang, // lang(left)
6713 ast_func_number_0, // number()
6714 ast_func_number_1, // number(left)
6715 ast_func_sum, // sum(left)
6716 ast_func_floor, // floor(left)
6717 ast_func_ceiling, // ceiling(left)
6718 ast_func_round, // round(left)
6719 ast_step, // process set left with step
6720 ast_step_root // select root node
6721 };
6722
6723 enum axis_t
6724 {
6725 axis_ancestor,
6726 axis_ancestor_or_self,
6727 axis_attribute,
6728 axis_child,
6729 axis_descendant,
6730 axis_descendant_or_self,
6731 axis_following,
6732 axis_following_sibling,
6733 axis_namespace,
6734 axis_parent,
6735 axis_preceding,
6736 axis_preceding_sibling,
6737 axis_self
6738 };
6739
6740 enum nodetest_t
6741 {
6742 nodetest_none,
6743 nodetest_name,
6744 nodetest_type_node,
6745 nodetest_type_comment,
6746 nodetest_type_pi,
6747 nodetest_type_text,
6748 nodetest_pi,
6749 nodetest_all,
6750 nodetest_all_in_namespace
6751 };
6752
6753 template <axis_t N> struct axis_to_type
6754 {
6755 static const axis_t axis;
6756 };
6757
6758 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
6759
6760 class xpath_ast_node
6761 {
6762 private:
6763 // node type
6764 char _type;
6765 char _rettype;
6766
6767 // for ast_step / ast_predicate
6768 char _axis;
6769 char _test;
6770
6771 // tree node structure
6772 xpath_ast_node* _left;
6773 xpath_ast_node* _right;
6774 xpath_ast_node* _next;
6775
6776 union
6777 {
6778 // value for ast_string_constant
6779 const char_t* string;
6780 // value for ast_number_constant
6781 double number;
6782 // variable for ast_variable
6783 xpath_variable* variable;
6784 // node test for ast_step (node name/namespace/node type/pi target)
6785 const char_t* nodetest;
6786 } _data;
6787
6788 xpath_ast_node(const xpath_ast_node&);
6789 xpath_ast_node& operator=(const xpath_ast_node&);
6790
6791 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
6792 {
6793 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
6794
6795 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
6796 {
6797 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
6798 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
6799 else if (lt == xpath_type_number || rt == xpath_type_number)
6800 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
6801 else if (lt == xpath_type_string || rt == xpath_type_string)
6802 {
6803 xpath_allocator_capture cr(stack.result);
6804
6805 xpath_string ls = lhs->eval_string(c, stack);
6806 xpath_string rs = rhs->eval_string(c, stack);
6807
6808 return comp(ls, rs);
6809 }
6810 }
6811 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
6812 {
6813 xpath_allocator_capture cr(stack.result);
6814
6815 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
6816 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6817
6818 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
6819 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6820 {
6821 xpath_allocator_capture cri(stack.result);
6822
6823 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
6824 return true;
6825 }
6826
6827 return false;
6828 }
6829 else
6830 {
6831 if (lt == xpath_type_node_set)
6832 {
6833 swap(lhs, rhs);
6834 swap(lt, rt);
6835 }
6836
6837 if (lt == xpath_type_boolean)
6838 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
6839 else if (lt == xpath_type_number)
6840 {
6841 xpath_allocator_capture cr(stack.result);
6842
6843 double l = lhs->eval_number(c, stack);
6844 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6845
6846 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6847 {
6848 xpath_allocator_capture cri(stack.result);
6849
6850 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
6851 return true;
6852 }
6853
6854 return false;
6855 }
6856 else if (lt == xpath_type_string)
6857 {
6858 xpath_allocator_capture cr(stack.result);
6859
6860 xpath_string l = lhs->eval_string(c, stack);
6861 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6862
6863 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6864 {
6865 xpath_allocator_capture cri(stack.result);
6866
6867 if (comp(l, string_value(*ri, stack.result)))
6868 return true;
6869 }
6870
6871 return false;
6872 }
6873 }
6874
6875 assert(!"Wrong types");
6876 return false;
6877 }
6878
6879 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
6880 {
6881 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
6882
6883 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
6884 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
6885 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
6886 {
6887 xpath_allocator_capture cr(stack.result);
6888
6889 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
6890 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6891
6892 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
6893 {
6894 xpath_allocator_capture cri(stack.result);
6895
6896 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
6897
6898 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6899 {
6900 xpath_allocator_capture crii(stack.result);
6901
6902 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
6903 return true;
6904 }
6905 }
6906
6907 return false;
6908 }
6909 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
6910 {
6911 xpath_allocator_capture cr(stack.result);
6912
6913 double l = lhs->eval_number(c, stack);
6914 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6915
6916 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6917 {
6918 xpath_allocator_capture cri(stack.result);
6919
6920 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
6921 return true;
6922 }
6923
6924 return false;
6925 }
6926 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
6927 {
6928 xpath_allocator_capture cr(stack.result);
6929
6930 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
6931 double r = rhs->eval_number(c, stack);
6932
6933 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
6934 {
6935 xpath_allocator_capture cri(stack.result);
6936
6937 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
6938 return true;
6939 }
6940
6941 return false;
6942 }
6943 else
6944 {
6945 assert(!"Wrong types");
6946 return false;
6947 }
6948 }
6949
6950 void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
6951 {
6952 assert(ns.size() >= first);
6953
6954 size_t i = 1;
6955 size_t size = ns.size() - first;
6956
6957 xpath_node* last = ns.begin() + first;
6958
6959 // remove_if... or well, sort of
6960 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
6961 {
6962 xpath_context c(*it, i, size);
6963
6964 if (expr->rettype() == xpath_type_number)
6965 {
6966 if (expr->eval_number(c, stack) == i)
6967 *last++ = *it;
6968 }
6969 else if (expr->eval_boolean(c, stack))
6970 *last++ = *it;
6971 }
6972
6973 ns.truncate(last);
6974 }
6975
6976 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)
6977 {
6978 if (ns.size() == first) return;
6979
6980 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
6981 {
6982 apply_predicate(ns, first, pred->_left, stack);
6983 }
6984 }
6985
6986 void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)
6987 {
6988 if (!a) return;
6989
6990 const char_t* name = a.name();
6991
6992 // There are no attribute nodes corresponding to attributes that declare namespaces
6993 // That is, "xmlns:..." or "xmlns"
6994 if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
6995
6996 switch (_test)
6997 {
6998 case nodetest_name:
6999 if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
7000 break;
7001
7002 case nodetest_type_node:
7003 case nodetest_all:
7004 ns.push_back(xpath_node(a, parent), alloc);
7005 break;
7006
7007 case nodetest_all_in_namespace:
7008 if (starts_with(name, _data.nodetest))
7009 ns.push_back(xpath_node(a, parent), alloc);
7010 break;
7011
7012 default:
7013 ;
7014 }
7015 }
7016
7017 void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)
7018 {
7019 if (!n) return;
7020
7021 switch (_test)
7022 {
7023 case nodetest_name:
7024 if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
7025 break;
7026
7027 case nodetest_type_node:
7028 ns.push_back(n, alloc);
7029 break;
7030
7031 case nodetest_type_comment:
7032 if (n.type() == node_comment)
7033 ns.push_back(n, alloc);
7034 break;
7035
7036 case nodetest_type_text:
7037 if (n.type() == node_pcdata || n.type() == node_cdata)
7038 ns.push_back(n, alloc);
7039 break;
7040
7041 case nodetest_type_pi:
7042 if (n.type() == node_pi)
7043 ns.push_back(n, alloc);
7044 break;
7045
7046 case nodetest_pi:
7047 if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
7048 ns.push_back(n, alloc);
7049 break;
7050
7051 case nodetest_all:
7052 if (n.type() == node_element)
7053 ns.push_back(n, alloc);
7054 break;
7055
7056 case nodetest_all_in_namespace:
7057 if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
7058 ns.push_back(n, alloc);
7059 break;
7060
7061 default:
7062 assert(!"Unknown axis");
7063 }
7064 }
7065
7066 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)
7067 {
7068 const axis_t axis = T::axis;
7069
7070 switch (axis)
7071 {
7072 case axis_attribute:
7073 {
7074 for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
7075 step_push(ns, a, n, alloc);
7076
7077 break;
7078 }
7079
7080 case axis_child:
7081 {
7082 for (xml_node c = n.first_child(); c; c = c.next_sibling())
7083 step_push(ns, c, alloc);
7084
7085 break;
7086 }
7087
7088 case axis_descendant:
7089 case axis_descendant_or_self:
7090 {
7091 if (axis == axis_descendant_or_self)
7092 step_push(ns, n, alloc);
7093
7094 xml_node cur = n.first_child();
7095
7096 while (cur && cur != n)
7097 {
7098 step_push(ns, cur, alloc);
7099
7100 if (cur.first_child())
7101 cur = cur.first_child();
7102 else if (cur.next_sibling())
7103 cur = cur.next_sibling();
7104 else
7105 {
7106 while (!cur.next_sibling() && cur != n)
7107 cur = cur.parent();
7108
7109 if (cur != n) cur = cur.next_sibling();
7110 }
7111 }
7112
7113 break;
7114 }
7115
7116 case axis_following_sibling:
7117 {
7118 for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
7119 step_push(ns, c, alloc);
7120
7121 break;
7122 }
7123
7124 case axis_preceding_sibling:
7125 {
7126 for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
7127 step_push(ns, c, alloc);
7128
7129 break;
7130 }
7131
7132 case axis_following:
7133 {
7134 xml_node cur = n;
7135
7136 // exit from this node so that we don't include descendants
7137 while (cur && !cur.next_sibling()) cur = cur.parent();
7138 cur = cur.next_sibling();
7139
7140 for (;;)
7141 {
7142 step_push(ns, cur, alloc);
7143
7144 if (cur.first_child())
7145 cur = cur.first_child();
7146 else if (cur.next_sibling())
7147 cur = cur.next_sibling();
7148 else
7149 {
7150 while (cur && !cur.next_sibling()) cur = cur.parent();
7151 cur = cur.next_sibling();
7152
7153 if (!cur) break;
7154 }
7155 }
7156
7157 break;
7158 }
7159
7160 case axis_preceding:
7161 {
7162 xml_node cur = n;
7163
7164 while (cur && !cur.previous_sibling()) cur = cur.parent();
7165 cur = cur.previous_sibling();
7166
7167 for (;;)
7168 {
7169 if (cur.last_child())
7170 cur = cur.last_child();
7171 else
7172 {
7173 // leaf node, can't be ancestor
7174 step_push(ns, cur, alloc);
7175
7176 if (cur.previous_sibling())
7177 cur = cur.previous_sibling();
7178 else
7179 {
7180 do
7181 {
7182 cur = cur.parent();
7183 if (!cur) break;
7184
7185 if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
7186 }
7187 while (!cur.previous_sibling());
7188
7189 cur = cur.previous_sibling();
7190
7191 if (!cur) break;
7192 }
7193 }
7194 }
7195
7196 break;
7197 }
7198
7199 case axis_ancestor:
7200 case axis_ancestor_or_self:
7201 {
7202 if (axis == axis_ancestor_or_self)
7203 step_push(ns, n, alloc);
7204
7205 xml_node cur = n.parent();
7206
7207 while (cur)
7208 {
7209 step_push(ns, cur, alloc);
7210
7211 cur = cur.parent();
7212 }
7213
7214 break;
7215 }
7216
7217 case axis_self:
7218 {
7219 step_push(ns, n, alloc);
7220
7221 break;
7222 }
7223
7224 case axis_parent:
7225 {
7226 if (n.parent()) step_push(ns, n.parent(), alloc);
7227
7228 break;
7229 }
7230
7231 default:
7232 assert(!"Unimplemented axis");
7233 }
7234 }
7235
7236 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)
7237 {
7238 const axis_t axis = T::axis;
7239
7240 switch (axis)
7241 {
7242 case axis_ancestor:
7243 case axis_ancestor_or_self:
7244 {
7245 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
7246 step_push(ns, a, p, alloc);
7247
7248 xml_node cur = p;
7249
7250 while (cur)
7251 {
7252 step_push(ns, cur, alloc);
7253
7254 cur = cur.parent();
7255 }
7256
7257 break;
7258 }
7259
7260 case axis_descendant_or_self:
7261 case axis_self:
7262 {
7263 if (_test == nodetest_type_node) // reject attributes based on principal node type test
7264 step_push(ns, a, p, alloc);
7265
7266 break;
7267 }
7268
7269 case axis_following:
7270 {
7271 xml_node cur = p;
7272
7273 for (;;)
7274 {
7275 if (cur.first_child())
7276 cur = cur.first_child();
7277 else if (cur.next_sibling())
7278 cur = cur.next_sibling();
7279 else
7280 {
7281 while (cur && !cur.next_sibling()) cur = cur.parent();
7282 cur = cur.next_sibling();
7283
7284 if (!cur) break;
7285 }
7286
7287 step_push(ns, cur, alloc);
7288 }
7289
7290 break;
7291 }
7292
7293 case axis_parent:
7294 {
7295 step_push(ns, p, alloc);
7296
7297 break;
7298 }
7299
7300 case axis_preceding:
7301 {
7302 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
7303 step_fill(ns, p, alloc, v);
7304 break;
7305 }
7306
7307 default:
7308 assert(!"Unimplemented axis");
7309 }
7310 }
7311
7312 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
7313 {
7314 const axis_t axis = T::axis;
7315 bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
7316
7317 xpath_node_set_raw ns;
7318 ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
7319
7320 if (_left)
7321 {
7322 xpath_node_set_raw s = _left->eval_node_set(c, stack);
7323
7324 // self axis preserves the original order
7325 if (axis == axis_self) ns.set_type(s.type());
7326
7327 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
7328 {
7329 size_t size = ns.size();
7330
7331 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
7332 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
7333
7334 if (it->node())
7335 step_fill(ns, it->node(), stack.result, v);
7336 else if (attributes)
7337 step_fill(ns, it->attribute(), it->parent(), stack.result, v);
7338
7339 apply_predicates(ns, size, stack);
7340 }
7341 }
7342 else
7343 {
7344 if (c.n.node())
7345 step_fill(ns, c.n.node(), stack.result, v);
7346 else if (attributes)
7347 step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
7348
7349 apply_predicates(ns, 0, stack);
7350 }
7351
7352 // child, attribute and self axes always generate unique set of nodes
7353 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
7354 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
7355 ns.remove_duplicates();
7356
7357 return ns;
7358 }
7359
7360 public:
7361 xpath_ast_node(ast_type_t type, xpath_value_type rettype, const char_t* value):
7362 _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
7363 {
7364 assert(type == ast_string_constant);
7365 _data.string = value;
7366 }
7367
7368 xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value):
7369 _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
7370 {
7371 assert(type == ast_number_constant);
7372 _data.number = value;
7373 }
7374
7375 xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value):
7376 _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
7377 {
7378 assert(type == ast_variable);
7379 _data.variable = value;
7380 }
7381
7382 xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
7383 _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(left), _right(right), _next(0)
7384 {
7385 }
7386
7387 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
7388 _type((char)type), _rettype(xpath_type_node_set), _axis((char)axis), _test((char)test), _left(left), _right(0), _next(0)
7389 {
7390 _data.nodetest = contents;
7391 }
7392
7393 void set_next(xpath_ast_node* value)
7394 {
7395 _next = value;
7396 }
7397
7398 void set_right(xpath_ast_node* value)
7399 {
7400 _right = value;
7401 }
7402
7403 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
7404 {
7405 switch (_type)
7406 {
7407 case ast_op_or:
7408 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
7409
7410 case ast_op_and:
7411 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
7412
7413 case ast_op_equal:
7414 return compare_eq(_left, _right, c, stack, equal_to());
7415
7416 case ast_op_not_equal:
7417 return compare_eq(_left, _right, c, stack, not_equal_to());
7418
7419 case ast_op_less:
7420 return compare_rel(_left, _right, c, stack, less());
7421
7422 case ast_op_greater:
7423 return compare_rel(_right, _left, c, stack, less());
7424
7425 case ast_op_less_or_equal:
7426 return compare_rel(_left, _right, c, stack, less_equal());
7427
7428 case ast_op_greater_or_equal:
7429 return compare_rel(_right, _left, c, stack, less_equal());
7430
7431 case ast_func_starts_with:
7432 {
7433 xpath_allocator_capture cr(stack.result);
7434
7435 xpath_string lr = _left->eval_string(c, stack);
7436 xpath_string rr = _right->eval_string(c, stack);
7437
7438 return starts_with(lr.c_str(), rr.c_str());
7439 }
7440
7441 case ast_func_contains:
7442 {
7443 xpath_allocator_capture cr(stack.result);
7444
7445 xpath_string lr = _left->eval_string(c, stack);
7446 xpath_string rr = _right->eval_string(c, stack);
7447
7448 return find_substring(lr.c_str(), rr.c_str()) != 0;
7449 }
7450
7451 case ast_func_boolean:
7452 return _left->eval_boolean(c, stack);
7453
7454 case ast_func_not:
7455 return !_left->eval_boolean(c, stack);
7456
7457 case ast_func_true:
7458 return true;
7459
7460 case ast_func_false:
7461 return false;
7462
7463 case ast_func_lang:
7464 {
7465 if (c.n.attribute()) return false;
7466
7467 xpath_allocator_capture cr(stack.result);
7468
7469 xpath_string lang = _left->eval_string(c, stack);
7470
7471 for (xml_node n = c.n.node(); n; n = n.parent())
7472 {
7473 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
7474
7475 if (a)
7476 {
7477 const char_t* value = a.value();
7478
7479 // strnicmp / strncasecmp is not portable
7480 for (const char_t* lit = lang.c_str(); *lit; ++lit)
7481 {
7482 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
7483 ++value;
7484 }
7485
7486 return *value == 0 || *value == '-';
7487 }
7488 }
7489
7490 return false;
7491 }
7492
7493 case ast_variable:
7494 {
7495 assert(_rettype == _data.variable->type());
7496
7497 if (_rettype == xpath_type_boolean)
7498 return _data.variable->get_boolean();
7499
7500 // fallthrough to type conversion
7501 }
7502
7503 default:
7504 {
7505 switch (_rettype)
7506 {
7507 case xpath_type_number:
7508 return convert_number_to_boolean(eval_number(c, stack));
7509
7510 case xpath_type_string:
7511 {
7512 xpath_allocator_capture cr(stack.result);
7513
7514 return !eval_string(c, stack).empty();
7515 }
7516
7517 case xpath_type_node_set:
7518 {
7519 xpath_allocator_capture cr(stack.result);
7520
7521 return !eval_node_set(c, stack).empty();
7522 }
7523
7524 default:
7525 assert(!"Wrong expression for return type boolean");
7526 return false;
7527 }
7528 }
7529 }
7530 }
7531
7532 double eval_number(const xpath_context& c, const xpath_stack& stack)
7533 {
7534 switch (_type)
7535 {
7536 case ast_op_add:
7537 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
7538
7539 case ast_op_subtract:
7540 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
7541
7542 case ast_op_multiply:
7543 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
7544
7545 case ast_op_divide:
7546 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
7547
7548 case ast_op_mod:
7549 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
7550
7551 case ast_op_negate:
7552 return -_left->eval_number(c, stack);
7553
7554 case ast_number_constant:
7555 return _data.number;
7556
7557 case ast_func_last:
7558 return (double)c.size;
7559
7560 case ast_func_position:
7561 return (double)c.position;
7562
7563 case ast_func_count:
7564 {
7565 xpath_allocator_capture cr(stack.result);
7566
7567 return (double)_left->eval_node_set(c, stack).size();
7568 }
7569
7570 case ast_func_string_length_0:
7571 {
7572 xpath_allocator_capture cr(stack.result);
7573
7574 return (double)string_value(c.n, stack.result).length();
7575 }
7576
7577 case ast_func_string_length_1:
7578 {
7579 xpath_allocator_capture cr(stack.result);
7580
7581 return (double)_left->eval_string(c, stack).length();
7582 }
7583
7584 case ast_func_number_0:
7585 {
7586 xpath_allocator_capture cr(stack.result);
7587
7588 return convert_string_to_number(string_value(c.n, stack.result).c_str());
7589 }
7590
7591 case ast_func_number_1:
7592 return _left->eval_number(c, stack);
7593
7594 case ast_func_sum:
7595 {
7596 xpath_allocator_capture cr(stack.result);
7597
7598 double r = 0;
7599
7600 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
7601
7602 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
7603 {
7604 xpath_allocator_capture cri(stack.result);
7605
7606 r += convert_string_to_number(string_value(*it, stack.result).c_str());
7607 }
7608
7609 return r;
7610 }
7611
7612 case ast_func_floor:
7613 {
7614 double r = _left->eval_number(c, stack);
7615
7616 return r == r ? floor(r) : r;
7617 }
7618
7619 case ast_func_ceiling:
7620 {
7621 double r = _left->eval_number(c, stack);
7622
7623 return r == r ? ceil(r) : r;
7624 }
7625
7626 case ast_func_round:
7627 return round_nearest_nzero(_left->eval_number(c, stack));
7628
7629 case ast_variable:
7630 {
7631 assert(_rettype == _data.variable->type());
7632
7633 if (_rettype == xpath_type_number)
7634 return _data.variable->get_number();
7635
7636 // fallthrough to type conversion
7637 }
7638
7639 default:
7640 {
7641 switch (_rettype)
7642 {
7643 case xpath_type_boolean:
7644 return eval_boolean(c, stack) ? 1 : 0;
7645
7646 case xpath_type_string:
7647 {
7648 xpath_allocator_capture cr(stack.result);
7649
7650 return convert_string_to_number(eval_string(c, stack).c_str());
7651 }
7652
7653 case xpath_type_node_set:
7654 {
7655 xpath_allocator_capture cr(stack.result);
7656
7657 return convert_string_to_number(eval_string(c, stack).c_str());
7658 }
7659
7660 default:
7661 assert(!"Wrong expression for return type number");
7662 return 0;
7663 }
7664
7665 }
7666 }
7667 }
7668
7669 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
7670 {
7671 assert(_type == ast_func_concat);
7672
7673 xpath_allocator_capture ct(stack.temp);
7674
7675 // count the string number
7676 size_t count = 1;
7677 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
7678
7679 // gather all strings
7680 xpath_string static_buffer[4];
7681 xpath_string* buffer = static_buffer;
7682
7683 // allocate on-heap for large concats
7684 if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
7685 {
7686 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
7687 assert(buffer);
7688 }
7689
7690 // evaluate all strings to temporary stack
7691 xpath_stack swapped_stack = {stack.temp, stack.result};
7692
7693 buffer[0] = _left->eval_string(c, swapped_stack);
7694
7695 size_t pos = 1;
7696 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
7697 assert(pos == count);
7698
7699 // get total length
7700 size_t length = 0;
7701 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
7702
7703 // create final string
7704 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
7705 assert(result);
7706
7707 char_t* ri = result;
7708
7709 for (size_t j = 0; j < count; ++j)
7710 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
7711 *ri++ = *bi;
7712
7713 *ri = 0;
7714
7715 return xpath_string(result, true);
7716 }
7717
7718 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
7719 {
7720 switch (_type)
7721 {
7722 case ast_string_constant:
7723 return xpath_string_const(_data.string);
7724
7725 case ast_func_local_name_0:
7726 {
7727 xpath_node na = c.n;
7728
7729 return xpath_string_const(local_name(na));
7730 }
7731
7732 case ast_func_local_name_1:
7733 {
7734 xpath_allocator_capture cr(stack.result);
7735
7736 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
7737 xpath_node na = ns.first();
7738
7739 return xpath_string_const(local_name(na));
7740 }
7741
7742 case ast_func_name_0:
7743 {
7744 xpath_node na = c.n;
7745
7746 return xpath_string_const(qualified_name(na));
7747 }
7748
7749 case ast_func_name_1:
7750 {
7751 xpath_allocator_capture cr(stack.result);
7752
7753 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
7754 xpath_node na = ns.first();
7755
7756 return xpath_string_const(qualified_name(na));
7757 }
7758
7759 case ast_func_namespace_uri_0:
7760 {
7761 xpath_node na = c.n;
7762
7763 return xpath_string_const(namespace_uri(na));
7764 }
7765
7766 case ast_func_namespace_uri_1:
7767 {
7768 xpath_allocator_capture cr(stack.result);
7769
7770 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
7771 xpath_node na = ns.first();
7772
7773 return xpath_string_const(namespace_uri(na));
7774 }
7775
7776 case ast_func_string_0:
7777 return string_value(c.n, stack.result);
7778
7779 case ast_func_string_1:
7780 return _left->eval_string(c, stack);
7781
7782 case ast_func_concat:
7783 return eval_string_concat(c, stack);
7784
7785 case ast_func_substring_before:
7786 {
7787 xpath_allocator_capture cr(stack.temp);
7788
7789 xpath_stack swapped_stack = {stack.temp, stack.result};
7790
7791 xpath_string s = _left->eval_string(c, swapped_stack);
7792 xpath_string p = _right->eval_string(c, swapped_stack);
7793
7794 const char_t* pos = find_substring(s.c_str(), p.c_str());
7795
7796 return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
7797 }
7798
7799 case ast_func_substring_after:
7800 {
7801 xpath_allocator_capture cr(stack.temp);
7802
7803 xpath_stack swapped_stack = {stack.temp, stack.result};
7804
7805 xpath_string s = _left->eval_string(c, swapped_stack);
7806 xpath_string p = _right->eval_string(c, swapped_stack);
7807
7808 const char_t* pos = find_substring(s.c_str(), p.c_str());
7809 if (!pos) return xpath_string();
7810
7811 const char_t* result = pos + p.length();
7812
7813 return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
7814 }
7815
7816 case ast_func_substring_2:
7817 {
7818 xpath_allocator_capture cr(stack.temp);
7819
7820 xpath_stack swapped_stack = {stack.temp, stack.result};
7821
7822 xpath_string s = _left->eval_string(c, swapped_stack);
7823 size_t s_length = s.length();
7824
7825 double first = round_nearest(_right->eval_number(c, stack));
7826
7827 if (is_nan(first)) return xpath_string(); // NaN
7828 else if (first >= s_length + 1) return xpath_string();
7829
7830 size_t pos = first < 1 ? 1 : (size_t)first;
7831 assert(1 <= pos && pos <= s_length + 1);
7832
7833 const char_t* rbegin = s.c_str() + (pos - 1);
7834
7835 return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
7836 }
7837
7838 case ast_func_substring_3:
7839 {
7840 xpath_allocator_capture cr(stack.temp);
7841
7842 xpath_stack swapped_stack = {stack.temp, stack.result};
7843
7844 xpath_string s = _left->eval_string(c, swapped_stack);
7845 size_t s_length = s.length();
7846
7847 double first = round_nearest(_right->eval_number(c, stack));
7848 double last = first + round_nearest(_right->_next->eval_number(c, stack));
7849
7850 if (is_nan(first) || is_nan(last)) return xpath_string();
7851 else if (first >= s_length + 1) return xpath_string();
7852 else if (first >= last) return xpath_string();
7853 else if (last < 1) return xpath_string();
7854
7855 size_t pos = first < 1 ? 1 : (size_t)first;
7856 size_t end = last >= s_length + 1 ? s_length + 1 : (size_t)last;
7857
7858 assert(1 <= pos && pos <= end && end <= s_length + 1);
7859 const char_t* rbegin = s.c_str() + (pos - 1);
7860 const char_t* rend = s.c_str() + (end - 1);
7861
7862 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
7863 }
7864
7865 case ast_func_normalize_space_0:
7866 {
7867 xpath_string s = string_value(c.n, stack.result);
7868
7869 normalize_space(s.data(stack.result));
7870
7871 return s;
7872 }
7873
7874 case ast_func_normalize_space_1:
7875 {
7876 xpath_string s = _left->eval_string(c, stack);
7877
7878 normalize_space(s.data(stack.result));
7879
7880 return s;
7881 }
7882
7883 case ast_func_translate:
7884 {
7885 xpath_allocator_capture cr(stack.temp);
7886
7887 xpath_stack swapped_stack = {stack.temp, stack.result};
7888
7889 xpath_string s = _left->eval_string(c, stack);
7890 xpath_string from = _right->eval_string(c, swapped_stack);
7891 xpath_string to = _right->_next->eval_string(c, swapped_stack);
7892
7893 translate(s.data(stack.result), from.c_str(), to.c_str());
7894
7895 return s;
7896 }
7897
7898 case ast_variable:
7899 {
7900 assert(_rettype == _data.variable->type());
7901
7902 if (_rettype == xpath_type_string)
7903 return xpath_string_const(_data.variable->get_string());
7904
7905 // fallthrough to type conversion
7906 }
7907
7908 default:
7909 {
7910 switch (_rettype)
7911 {
7912 case xpath_type_boolean:
7913 return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
7914
7915 case xpath_type_number:
7916 return convert_number_to_string(eval_number(c, stack), stack.result);
7917
7918 case xpath_type_node_set:
7919 {
7920 xpath_allocator_capture cr(stack.temp);
7921
7922 xpath_stack swapped_stack = {stack.temp, stack.result};
7923
7924 xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
7925 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
7926 }
7927
7928 default:
7929 assert(!"Wrong expression for return type string");
7930 return xpath_string();
7931 }
7932 }
7933 }
7934 }
7935
7936 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
7937 {
7938 switch (_type)
7939 {
7940 case ast_op_union:
7941 {
7942 xpath_allocator_capture cr(stack.temp);
7943
7944 xpath_stack swapped_stack = {stack.temp, stack.result};
7945
7946 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
7947 xpath_node_set_raw rs = _right->eval_node_set(c, stack);
7948
7949 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
7950 rs.set_type(xpath_node_set::type_unsorted);
7951
7952 rs.append(ls.begin(), ls.end(), stack.result);
7953 rs.remove_duplicates();
7954
7955 return rs;
7956 }
7957
7958 case ast_filter:
7959 case ast_filter_posinv:
7960 {
7961 xpath_node_set_raw set = _left->eval_node_set(c, stack);
7962
7963 // either expression is a number or it contains position() call; sort by document order
7964 if (_type == ast_filter) set.sort_do();
7965
7966 apply_predicate(set, 0, _right, stack);
7967
7968 return set;
7969 }
7970
7971 case ast_func_id:
7972 return xpath_node_set_raw();
7973
7974 case ast_step:
7975 {
7976 switch (_axis)
7977 {
7978 case axis_ancestor:
7979 return step_do(c, stack, axis_to_type<axis_ancestor>());
7980
7981 case axis_ancestor_or_self:
7982 return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
7983
7984 case axis_attribute:
7985 return step_do(c, stack, axis_to_type<axis_attribute>());
7986
7987 case axis_child:
7988 return step_do(c, stack, axis_to_type<axis_child>());
7989
7990 case axis_descendant:
7991 return step_do(c, stack, axis_to_type<axis_descendant>());
7992
7993 case axis_descendant_or_self:
7994 return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
7995
7996 case axis_following:
7997 return step_do(c, stack, axis_to_type<axis_following>());
7998
7999 case axis_following_sibling:
8000 return step_do(c, stack, axis_to_type<axis_following_sibling>());
8001
8002 case axis_namespace:
8003 // namespaced axis is not supported
8004 return xpath_node_set_raw();
8005
8006 case axis_parent:
8007 return step_do(c, stack, axis_to_type<axis_parent>());
8008
8009 case axis_preceding:
8010 return step_do(c, stack, axis_to_type<axis_preceding>());
8011
8012 case axis_preceding_sibling:
8013 return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
8014
8015 case axis_self:
8016 return step_do(c, stack, axis_to_type<axis_self>());
8017 }
8018 }
8019
8020 case ast_step_root:
8021 {
8022 assert(!_right); // root step can't have any predicates
8023
8024 xpath_node_set_raw ns;
8025
8026 ns.set_type(xpath_node_set::type_sorted);
8027
8028 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
8029 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
8030
8031 return ns;
8032 }
8033
8034 case ast_variable:
8035 {
8036 assert(_rettype == _data.variable->type());
8037
8038 if (_rettype == xpath_type_node_set)
8039 {
8040 const xpath_node_set& s = _data.variable->get_node_set();
8041
8042 xpath_node_set_raw ns;
8043
8044 ns.set_type(s.type());
8045 ns.append(s.begin(), s.end(), stack.result);
8046
8047 return ns;
8048 }
8049
8050 // fallthrough to type conversion
8051 }
8052
8053 default:
8054 assert(!"Wrong expression for return type node set");
8055 return xpath_node_set_raw();
8056 }
8057 }
8058
8059 bool is_posinv()
8060 {
8061 switch (_type)
8062 {
8063 case ast_func_position:
8064 return false;
8065
8066 case ast_string_constant:
8067 case ast_number_constant:
8068 case ast_variable:
8069 return true;
8070
8071 case ast_step:
8072 case ast_step_root:
8073 return true;
8074
8075 case ast_predicate:
8076 case ast_filter:
8077 case ast_filter_posinv:
8078 return true;
8079
8080 default:
8081 if (_left && !_left->is_posinv()) return false;
8082
8083 for (xpath_ast_node* n = _right; n; n = n->_next)
8084 if (!n->is_posinv()) return false;
8085
8086 return true;
8087 }
8088 }
8089
8090 xpath_value_type rettype() const
8091 {
8092 return static_cast<xpath_value_type>(_rettype);
8093 }
8094 };
8095
8096 struct xpath_parser
8097 {
8098 xpath_allocator* _alloc;
8099 xpath_lexer _lexer;
8100
8101 const char_t* _query;
8102 xpath_variable_set* _variables;
8103
8104 xpath_parse_result* _result;
8105
8106 #ifdef PUGIXML_NO_EXCEPTIONS
8107 jmp_buf _error_handler;
8108 #endif
8109
8110 void throw_error(const char* message)
8111 {
8112 _result->error = message;
8113 _result->offset = _lexer.current_pos() - _query;
8114
8115 #ifdef PUGIXML_NO_EXCEPTIONS
8116 longjmp(_error_handler, 1);
8117 #else
8118 throw xpath_exception(*_result);
8119 #endif
8120 }
8121
8122 void throw_error_oom()
8123 {
8124 #ifdef PUGIXML_NO_EXCEPTIONS
8125 throw_error("Out of memory");
8126 #else
8127 throw std::bad_alloc();
8128 #endif
8129 }
8130
8131 void* alloc_node()
8132 {
8133 void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
8134
8135 if (!result) throw_error_oom();
8136
8137 return result;
8138 }
8139
8140 const char_t* alloc_string(const xpath_lexer_string& value)
8141 {
8142 if (value.begin)
8143 {
8144 size_t length = static_cast<size_t>(value.end - value.begin);
8145
8146 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
8147 if (!c) throw_error_oom();
8148
8149 memcpy(c, value.begin, length * sizeof(char_t));
8150 c[length] = 0;
8151
8152 return c;
8153 }
8154 else return 0;
8155 }
8156
8157 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
8158 {
8159 assert(argc <= 1);
8160
8161 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
8162
8163 return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
8164 }
8165
8166 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
8167 {
8168 switch (name.begin[0])
8169 {
8170 case 'b':
8171 if (name == PUGIXML_TEXT("boolean") && argc == 1)
8172 return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
8173
8174 break;
8175
8176 case 'c':
8177 if (name == PUGIXML_TEXT("count") && argc == 1)
8178 {
8179 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
8180 return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
8181 }
8182 else if (name == PUGIXML_TEXT("contains") && argc == 2)
8183 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
8184 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
8185 return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
8186 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
8187 return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
8188
8189 break;
8190
8191 case 'f':
8192 if (name == PUGIXML_TEXT("false") && argc == 0)
8193 return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
8194 else if (name == PUGIXML_TEXT("floor") && argc == 1)
8195 return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
8196
8197 break;
8198
8199 case 'i':
8200 if (name == PUGIXML_TEXT("id") && argc == 1)
8201 return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
8202
8203 break;
8204
8205 case 'l':
8206 if (name == PUGIXML_TEXT("last") && argc == 0)
8207 return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
8208 else if (name == PUGIXML_TEXT("lang") && argc == 1)
8209 return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
8210 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
8211 return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
8212
8213 break;
8214
8215 case 'n':
8216 if (name == PUGIXML_TEXT("name") && argc <= 1)
8217 return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
8218 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
8219 return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
8220 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
8221 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
8222 else if (name == PUGIXML_TEXT("not") && argc == 1)
8223 return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
8224 else if (name == PUGIXML_TEXT("number") && argc <= 1)
8225 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
8226
8227 break;
8228
8229 case 'p':
8230 if (name == PUGIXML_TEXT("position") && argc == 0)
8231 return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
8232
8233 break;
8234
8235 case 'r':
8236 if (name == PUGIXML_TEXT("round") && argc == 1)
8237 return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
8238
8239 break;
8240
8241 case 's':
8242 if (name == PUGIXML_TEXT("string") && argc <= 1)
8243 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
8244 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
8245 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]);
8246 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
8247 return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
8248 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
8249 return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
8250 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
8251 return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
8252 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
8253 return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
8254 else if (name == PUGIXML_TEXT("sum") && argc == 1)
8255 {
8256 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
8257 return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
8258 }
8259
8260 break;
8261
8262 case 't':
8263 if (name == PUGIXML_TEXT("translate") && argc == 3)
8264 return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
8265 else if (name == PUGIXML_TEXT("true") && argc == 0)
8266 return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
8267
8268 break;
8269 }
8270
8271 throw_error("Unrecognized function or wrong parameter count");
8272
8273 return 0;
8274 }
8275
8276 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
8277 {
8278 specified = true;
8279
8280 switch (name.begin[0])
8281 {
8282 case 'a':
8283 if (name == PUGIXML_TEXT("ancestor"))
8284 return axis_ancestor;
8285 else if (name == PUGIXML_TEXT("ancestor-or-self"))
8286 return axis_ancestor_or_self;
8287 else if (name == PUGIXML_TEXT("attribute"))
8288 return axis_attribute;
8289
8290 break;
8291
8292 case 'c':
8293 if (name == PUGIXML_TEXT("child"))
8294 return axis_child;
8295
8296 break;
8297
8298 case 'd':
8299 if (name == PUGIXML_TEXT("descendant"))
8300 return axis_descendant;
8301 else if (name == PUGIXML_TEXT("descendant-or-self"))
8302 return axis_descendant_or_self;
8303
8304 break;
8305
8306 case 'f':
8307 if (name == PUGIXML_TEXT("following"))
8308 return axis_following;
8309 else if (name == PUGIXML_TEXT("following-sibling"))
8310 return axis_following_sibling;
8311
8312 break;
8313
8314 case 'n':
8315 if (name == PUGIXML_TEXT("namespace"))
8316 return axis_namespace;
8317
8318 break;
8319
8320 case 'p':
8321 if (name == PUGIXML_TEXT("parent"))
8322 return axis_parent;
8323 else if (name == PUGIXML_TEXT("preceding"))
8324 return axis_preceding;
8325 else if (name == PUGIXML_TEXT("preceding-sibling"))
8326 return axis_preceding_sibling;
8327
8328 break;
8329
8330 case 's':
8331 if (name == PUGIXML_TEXT("self"))
8332 return axis_self;
8333
8334 break;
8335 }
8336
8337 specified = false;
8338 return axis_child;
8339 }
8340
8341 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
8342 {
8343 switch (name.begin[0])
8344 {
8345 case 'c':
8346 if (name == PUGIXML_TEXT("comment"))
8347 return nodetest_type_comment;
8348
8349 break;
8350
8351 case 'n':
8352 if (name == PUGIXML_TEXT("node"))
8353 return nodetest_type_node;
8354
8355 break;
8356
8357 case 'p':
8358 if (name == PUGIXML_TEXT("processing-instruction"))
8359 return nodetest_type_pi;
8360
8361 break;
8362
8363 case 't':
8364 if (name == PUGIXML_TEXT("text"))
8365 return nodetest_type_text;
8366
8367 break;
8368 }
8369
8370 return nodetest_none;
8371 }
8372
8373 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
8374 xpath_ast_node* parse_primary_expression()
8375 {
8376 switch (_lexer.current())
8377 {
8378 case lex_var_ref:
8379 {
8380 xpath_lexer_string name = _lexer.contents();
8381
8382 if (!_variables)
8383 throw_error("Unknown variable: variable set is not provided");
8384
8385 xpath_variable* var = get_variable(_variables, name.begin, name.end);
8386
8387 if (!var)
8388 throw_error("Unknown variable: variable set does not contain the given name");
8389
8390 _lexer.next();
8391
8392 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
8393 }
8394
8395 case lex_open_brace:
8396 {
8397 _lexer.next();
8398
8399 xpath_ast_node* n = parse_expression();
8400
8401 if (_lexer.current() != lex_close_brace)
8402 throw_error("Unmatched braces");
8403
8404 _lexer.next();
8405
8406 return n;
8407 }
8408
8409 case lex_quoted_string:
8410 {
8411 const char_t* value = alloc_string(_lexer.contents());
8412
8413 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
8414 _lexer.next();
8415
8416 return n;
8417 }
8418
8419 case lex_number:
8420 {
8421 double value = 0;
8422
8423 if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value))
8424 throw_error_oom();
8425
8426 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
8427 _lexer.next();
8428
8429 return n;
8430 }
8431
8432 case lex_string:
8433 {
8434 xpath_ast_node* args[2] = {0};
8435 size_t argc = 0;
8436
8437 xpath_lexer_string function = _lexer.contents();
8438 _lexer.next();
8439
8440 xpath_ast_node* last_arg = 0;
8441
8442 if (_lexer.current() != lex_open_brace)
8443 throw_error("Unrecognized function call");
8444 _lexer.next();
8445
8446 if (_lexer.current() != lex_close_brace)
8447 args[argc++] = parse_expression();
8448
8449 while (_lexer.current() != lex_close_brace)
8450 {
8451 if (_lexer.current() != lex_comma)
8452 throw_error("No comma between function arguments");
8453 _lexer.next();
8454
8455 xpath_ast_node* n = parse_expression();
8456
8457 if (argc < 2) args[argc] = n;
8458 else last_arg->set_next(n);
8459
8460 argc++;
8461 last_arg = n;
8462 }
8463
8464 _lexer.next();
8465
8466 return parse_function(function, argc, args);
8467 }
8468
8469 default:
8470 throw_error("Unrecognizable primary expression");
8471
8472 return 0;
8473 }
8474 }
8475
8476 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
8477 // Predicate ::= '[' PredicateExpr ']'
8478 // PredicateExpr ::= Expr
8479 xpath_ast_node* parse_filter_expression()
8480 {
8481 xpath_ast_node* n = parse_primary_expression();
8482
8483 while (_lexer.current() == lex_open_square_brace)
8484 {
8485 _lexer.next();
8486
8487 xpath_ast_node* expr = parse_expression();
8488
8489 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
8490
8491 bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
8492
8493 n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
8494
8495 if (_lexer.current() != lex_close_square_brace)
8496 throw_error("Unmatched square brace");
8497
8498 _lexer.next();
8499 }
8500
8501 return n;
8502 }
8503
8504 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
8505 // AxisSpecifier ::= AxisName '::' | '@'?
8506 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
8507 // NameTest ::= '*' | NCName ':' '*' | QName
8508 // AbbreviatedStep ::= '.' | '..'
8509 xpath_ast_node* parse_step(xpath_ast_node* set)
8510 {
8511 if (set && set->rettype() != xpath_type_node_set)
8512 throw_error("Step has to be applied to node set");
8513
8514 bool axis_specified = false;
8515 axis_t axis = axis_child; // implied child axis
8516
8517 if (_lexer.current() == lex_axis_attribute)
8518 {
8519 axis = axis_attribute;
8520 axis_specified = true;
8521
8522 _lexer.next();
8523 }
8524 else if (_lexer.current() == lex_dot)
8525 {
8526 _lexer.next();
8527
8528 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
8529 }
8530 else if (_lexer.current() == lex_double_dot)
8531 {
8532 _lexer.next();
8533
8534 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
8535 }
8536
8537 nodetest_t nt_type = nodetest_none;
8538 xpath_lexer_string nt_name;
8539
8540 if (_lexer.current() == lex_string)
8541 {
8542 // node name test
8543 nt_name = _lexer.contents();
8544 _lexer.next();
8545
8546 // was it an axis name?
8547 if (_lexer.current() == lex_double_colon)
8548 {
8549 // parse axis name
8550 if (axis_specified) throw_error("Two axis specifiers in one step");
8551
8552 axis = parse_axis_name(nt_name, axis_specified);
8553
8554 if (!axis_specified) throw_error("Unknown axis");
8555
8556 // read actual node test
8557 _lexer.next();
8558
8559 if (_lexer.current() == lex_multiply)
8560 {
8561 nt_type = nodetest_all;
8562 nt_name = xpath_lexer_string();
8563 _lexer.next();
8564 }
8565 else if (_lexer.current() == lex_string)
8566 {
8567 nt_name = _lexer.contents();
8568 _lexer.next();
8569 }
8570 else throw_error("Unrecognized node test");
8571 }
8572
8573 if (nt_type == nodetest_none)
8574 {
8575 // node type test or processing-instruction
8576 if (_lexer.current() == lex_open_brace)
8577 {
8578 _lexer.next();
8579
8580 if (_lexer.current() == lex_close_brace)
8581 {
8582 _lexer.next();
8583
8584 nt_type = parse_node_test_type(nt_name);
8585
8586 if (nt_type == nodetest_none) throw_error("Unrecognized node type");
8587
8588 nt_name = xpath_lexer_string();
8589 }
8590 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
8591 {
8592 if (_lexer.current() != lex_quoted_string)
8593 throw_error("Only literals are allowed as arguments to processing-instruction()");
8594
8595 nt_type = nodetest_pi;
8596 nt_name = _lexer.contents();
8597 _lexer.next();
8598
8599 if (_lexer.current() != lex_close_brace)
8600 throw_error("Unmatched brace near processing-instruction()");
8601 _lexer.next();
8602 }
8603 else
8604 throw_error("Unmatched brace near node type test");
8605
8606 }
8607 // QName or NCName:*
8608 else
8609 {
8610 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
8611 {
8612 nt_name.end--; // erase *
8613
8614 nt_type = nodetest_all_in_namespace;
8615 }
8616 else nt_type = nodetest_name;
8617 }
8618 }
8619 }
8620 else if (_lexer.current() == lex_multiply)
8621 {
8622 nt_type = nodetest_all;
8623 _lexer.next();
8624 }
8625 else throw_error("Unrecognized node test");
8626
8627 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
8628
8629 xpath_ast_node* last = 0;
8630
8631 while (_lexer.current() == lex_open_square_brace)
8632 {
8633 _lexer.next();
8634
8635 xpath_ast_node* expr = parse_expression();
8636
8637 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
8638
8639 if (_lexer.current() != lex_close_square_brace)
8640 throw_error("Unmatched square brace");
8641 _lexer.next();
8642
8643 if (last) last->set_next(pred);
8644 else n->set_right(pred);
8645
8646 last = pred;
8647 }
8648
8649 return n;
8650 }
8651
8652 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
8653 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
8654 {
8655 xpath_ast_node* n = parse_step(set);
8656
8657 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
8658 {
8659 lexeme_t l = _lexer.current();
8660 _lexer.next();
8661
8662 if (l == lex_double_slash)
8663 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
8664
8665 n = parse_step(n);
8666 }
8667
8668 return n;
8669 }
8670
8671 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
8672 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
8673 xpath_ast_node* parse_location_path()
8674 {
8675 if (_lexer.current() == lex_slash)
8676 {
8677 _lexer.next();
8678
8679 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
8680
8681 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
8682 lexeme_t l = _lexer.current();
8683
8684 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
8685 return parse_relative_location_path(n);
8686 else
8687 return n;
8688 }
8689 else if (_lexer.current() == lex_double_slash)
8690 {
8691 _lexer.next();
8692
8693 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
8694 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
8695
8696 return parse_relative_location_path(n);
8697 }
8698
8699 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
8700 return parse_relative_location_path(0);
8701 }
8702
8703 // PathExpr ::= LocationPath
8704 // | FilterExpr
8705 // | FilterExpr '/' RelativeLocationPath
8706 // | FilterExpr '//' RelativeLocationPath
8707 xpath_ast_node* parse_path_expression()
8708 {
8709 // Clarification.
8710 // PathExpr begins with either LocationPath or FilterExpr.
8711 // FilterExpr begins with PrimaryExpr
8712 // PrimaryExpr begins with '$' in case of it being a variable reference,
8713 // '(' in case of it being an expression, string literal, number constant or
8714 // function call.
8715
8716 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
8717 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
8718 _lexer.current() == lex_string)
8719 {
8720 if (_lexer.current() == lex_string)
8721 {
8722 // This is either a function call, or not - if not, we shall proceed with location path
8723 const char_t* state = _lexer.state();
8724
8725 while (IS_CHARTYPE(*state, ct_space)) ++state;
8726
8727 if (*state != '(') return parse_location_path();
8728
8729 // This looks like a function call; however this still can be a node-test. Check it.
8730 if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
8731 }
8732
8733 xpath_ast_node* n = parse_filter_expression();
8734
8735 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
8736 {
8737 lexeme_t l = _lexer.current();
8738 _lexer.next();
8739
8740 if (l == lex_double_slash)
8741 {
8742 if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
8743
8744 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
8745 }
8746
8747 // select from location path
8748 return parse_relative_location_path(n);
8749 }
8750
8751 return n;
8752 }
8753 else return parse_location_path();
8754 }
8755
8756 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
8757 xpath_ast_node* parse_union_expression()
8758 {
8759 xpath_ast_node* n = parse_path_expression();
8760
8761 while (_lexer.current() == lex_union)
8762 {
8763 _lexer.next();
8764
8765 xpath_ast_node* expr = parse_union_expression();
8766
8767 if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
8768 throw_error("Union operator has to be applied to node sets");
8769
8770 n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
8771 }
8772
8773 return n;
8774 }
8775
8776 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
8777 xpath_ast_node* parse_unary_expression()
8778 {
8779 if (_lexer.current() == lex_minus)
8780 {
8781 _lexer.next();
8782
8783 xpath_ast_node* expr = parse_unary_expression();
8784
8785 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
8786 }
8787 else return parse_union_expression();
8788 }
8789
8790 // MultiplicativeExpr ::= UnaryExpr
8791 // | MultiplicativeExpr '*' UnaryExpr
8792 // | MultiplicativeExpr 'div' UnaryExpr
8793 // | MultiplicativeExpr 'mod' UnaryExpr
8794 xpath_ast_node* parse_multiplicative_expression()
8795 {
8796 xpath_ast_node* n = parse_unary_expression();
8797
8798 while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
8799 (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
8800 {
8801 ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
8802 _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
8803 _lexer.next();
8804
8805 xpath_ast_node* expr = parse_unary_expression();
8806
8807 n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
8808 }
8809
8810 return n;
8811 }
8812
8813 // AdditiveExpr ::= MultiplicativeExpr
8814 // | AdditiveExpr '+' MultiplicativeExpr
8815 // | AdditiveExpr '-' MultiplicativeExpr
8816 xpath_ast_node* parse_additive_expression()
8817 {
8818 xpath_ast_node* n = parse_multiplicative_expression();
8819
8820 while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
8821 {
8822 lexeme_t l = _lexer.current();
8823
8824 _lexer.next();
8825
8826 xpath_ast_node* expr = parse_multiplicative_expression();
8827
8828 n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
8829 }
8830
8831 return n;
8832 }
8833
8834 // RelationalExpr ::= AdditiveExpr
8835 // | RelationalExpr '<' AdditiveExpr
8836 // | RelationalExpr '>' AdditiveExpr
8837 // | RelationalExpr '<=' AdditiveExpr
8838 // | RelationalExpr '>=' AdditiveExpr
8839 xpath_ast_node* parse_relational_expression()
8840 {
8841 xpath_ast_node* n = parse_additive_expression();
8842
8843 while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
8844 _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
8845 {
8846 lexeme_t l = _lexer.current();
8847 _lexer.next();
8848
8849 xpath_ast_node* expr = parse_additive_expression();
8850
8851 n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
8852 l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
8853 }
8854
8855 return n;
8856 }
8857
8858 // EqualityExpr ::= RelationalExpr
8859 // | EqualityExpr '=' RelationalExpr
8860 // | EqualityExpr '!=' RelationalExpr
8861 xpath_ast_node* parse_equality_expression()
8862 {
8863 xpath_ast_node* n = parse_relational_expression();
8864
8865 while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
8866 {
8867 lexeme_t l = _lexer.current();
8868
8869 _lexer.next();
8870
8871 xpath_ast_node* expr = parse_relational_expression();
8872
8873 n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
8874 }
8875
8876 return n;
8877 }
8878
8879 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
8880 xpath_ast_node* parse_and_expression()
8881 {
8882 xpath_ast_node* n = parse_equality_expression();
8883
8884 while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
8885 {
8886 _lexer.next();
8887
8888 xpath_ast_node* expr = parse_equality_expression();
8889
8890 n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
8891 }
8892
8893 return n;
8894 }
8895
8896 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
8897 xpath_ast_node* parse_or_expression()
8898 {
8899 xpath_ast_node* n = parse_and_expression();
8900
8901 while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
8902 {
8903 _lexer.next();
8904
8905 xpath_ast_node* expr = parse_and_expression();
8906
8907 n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
8908 }
8909
8910 return n;
8911 }
8912
8913 // Expr ::= OrExpr
8914 xpath_ast_node* parse_expression()
8915 {
8916 return parse_or_expression();
8917 }
8918
8919 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
8920 {
8921 }
8922
8923 xpath_ast_node* parse()
8924 {
8925 xpath_ast_node* result = parse_expression();
8926
8927 if (_lexer.current() != lex_eof)
8928 {
8929 // there are still unparsed tokens left, error
8930 throw_error("Incorrect query");
8931 }
8932
8933 return result;
8934 }
8935
8936 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
8937 {
8938 xpath_parser parser(query, variables, alloc, result);
8939
8940 #ifdef PUGIXML_NO_EXCEPTIONS
8941 int error = setjmp(parser._error_handler);
8942
8943 return (error == 0) ? parser.parse() : 0;
8944 #else
8945 return parser.parse();
8946 #endif
8947 }
8948 };
8949
8950 struct xpath_query_impl
8951 {
8952 static xpath_query_impl* create()
8953 {
8954 void* memory = global_allocate(sizeof(xpath_query_impl));
8955
8956 return new (memory) xpath_query_impl();
8957 }
8958
8959 static void destroy(void* ptr)
8960 {
8961 if (!ptr) return;
8962
8963 // free all allocated pages
8964 static_cast<xpath_query_impl*>(ptr)->alloc.release();
8965
8966 // free allocator memory (with the first page)
8967 global_deallocate(ptr);
8968 }
8969
8970 xpath_query_impl(): root(0), alloc(&block)
8971 {
8972 block.next = 0;
8973 }
8974
8975 xpath_ast_node* root;
8976 xpath_allocator alloc;
8977 xpath_memory_block block;
8978 };
8979
8980 xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
8981 {
8982 if (!impl) return xpath_string();
8983
8984 #ifdef PUGIXML_NO_EXCEPTIONS
8985 if (setjmp(sd.error_handler)) return xpath_string();
8986 #endif
8987
8988 xpath_context c(n, 1, 1);
8989
8990 return impl->root->eval_string(c, sd.stack);
8991 }
8992}
8993
8994namespace pugi
8995{
8996#ifndef PUGIXML_NO_EXCEPTIONS
8997 xpath_exception::xpath_exception(const xpath_parse_result& result): _result(result)
8998 {
8999 assert(result.error);
9000 }
9001
9002 const char* xpath_exception::what() const throw()
9003 {
9004 return _result.error;
9005 }
9006
9007 const xpath_parse_result& xpath_exception::result() const
9008 {
9009 return _result;
9010 }
9011#endif
9012
9013 xpath_node::xpath_node()
9014 {
9015 }
9016
9017 xpath_node::xpath_node(const xml_node& node): _node(node)
9018 {
9019 }
9020
9021 xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): _node(attribute ? parent : xml_node()), _attribute(attribute)
9022 {
9023 }
9024
9025 xml_node xpath_node::node() const
9026 {
9027 return _attribute ? xml_node() : _node;
9028 }
9029
9030 xml_attribute xpath_node::attribute() const
9031 {
9032 return _attribute;
9033 }
9034
9035 xml_node xpath_node::parent() const
9036 {
9037 return _attribute ? _node : _node.parent();
9038 }
9039
9040 xpath_node::operator xpath_node::unspecified_bool_type() const
9041 {
9042 return (_node || _attribute) ? &xpath_node::_node : 0;
9043 }
9044
9045 bool xpath_node::operator!() const
9046 {
9047 return !(_node || _attribute);
9048 }
9049
9050 bool xpath_node::operator==(const xpath_node& n) const
9051 {
9052 return _node == n._node && _attribute == n._attribute;
9053 }
9054
9055 bool xpath_node::operator!=(const xpath_node& n) const
9056 {
9057 return _node != n._node || _attribute != n._attribute;
9058 }
9059
9060#ifdef __BORLANDC__
9061 bool operator&&(const xpath_node& lhs, bool rhs)
9062 {
9063 return (bool)lhs && rhs;
9064 }
9065
9066 bool operator||(const xpath_node& lhs, bool rhs)
9067 {
9068 return (bool)lhs || rhs;
9069 }
9070#endif
9071
9072 void xpath_node_set::_assign(const_iterator begin, const_iterator end)
9073 {
9074 assert(begin <= end);
9075
9076 size_t size = static_cast<size_t>(end - begin);
9077
9078 if (size <= 1)
9079 {
9080 // deallocate old buffer
9081 if (_begin != &_storage) global_deallocate(_begin);
9082
9083 // use internal buffer
9084 if (begin != end) _storage = *begin;
9085
9086 _begin = &_storage;
9087 _end = &_storage + size;
9088 }
9089 else
9090 {
9091 // make heap copy
9092 xpath_node* storage = static_cast<xpath_node*>(global_allocate(size * sizeof(xpath_node)));
9093
9094 if (!storage)
9095 {
9096 #ifdef PUGIXML_NO_EXCEPTIONS
9097 return;
9098 #else
9099 throw std::bad_alloc();
9100 #endif
9101 }
9102
9103 memcpy(storage, begin, size * sizeof(xpath_node));
9104
9105 // deallocate old buffer
9106 if (_begin != &_storage) global_deallocate(_begin);
9107
9108 // finalize
9109 _begin = storage;
9110 _end = storage + size;
9111 }
9112 }
9113
9114 xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
9115 {
9116 }
9117
9118 xpath_node_set::xpath_node_set(const_iterator begin, const_iterator end, type_t type): _type(type), _begin(&_storage), _end(&_storage)
9119 {
9120 _assign(begin, end);
9121 }
9122
9123 xpath_node_set::~xpath_node_set()
9124 {
9125 if (_begin != &_storage) global_deallocate(_begin);
9126 }
9127
9128 xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
9129 {
9130 _assign(ns._begin, ns._end);
9131 }
9132
9133 xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
9134 {
9135 if (this == &ns) return *this;
9136
9137 _type = ns._type;
9138 _assign(ns._begin, ns._end);
9139
9140 return *this;
9141 }
9142
9143 xpath_node_set::type_t xpath_node_set::type() const
9144 {
9145 return _type;
9146 }
9147
9148 size_t xpath_node_set::size() const
9149 {
9150 return _end - _begin;
9151 }
9152
9153 bool xpath_node_set::empty() const
9154 {
9155 return _begin == _end;
9156 }
9157
9158 const xpath_node& xpath_node_set::operator[](size_t index) const
9159 {
9160 assert(index < size());
9161 return _begin[index];
9162 }
9163
9164 xpath_node_set::const_iterator xpath_node_set::begin() const
9165 {
9166 return _begin;
9167 }
9168
9169 xpath_node_set::const_iterator xpath_node_set::end() const
9170 {
9171 return _end;
9172 }
9173
9174 void xpath_node_set::sort(bool reverse)
9175 {
9176 _type = xpath_sort(_begin, _end, _type, reverse);
9177 }
9178
9179 xpath_node xpath_node_set::first() const
9180 {
9181 return xpath_first(_begin, _end, _type);
9182 }
9183
9184 xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
9185 {
9186 }
9187
9188 xpath_parse_result::operator bool() const
9189 {
9190 return error == 0;
9191 }
9192 const char* xpath_parse_result::description() const
9193 {
9194 return error ? error : "No error";
9195 }
9196
9197 xpath_variable::xpath_variable()
9198 {
9199 }
9200
9201 const char_t* xpath_variable::name() const
9202 {
9203 switch (_type)
9204 {
9205 case xpath_type_node_set:
9206 return static_cast<const xpath_variable_node_set*>(this)->name;
9207
9208 case xpath_type_number:
9209 return static_cast<const xpath_variable_number*>(this)->name;
9210
9211 case xpath_type_string:
9212 return static_cast<const xpath_variable_string*>(this)->name;
9213
9214 case xpath_type_boolean:
9215 return static_cast<const xpath_variable_boolean*>(this)->name;
9216
9217 default:
9218 assert(!"Invalid variable type");
9219 return 0;
9220 }
9221 }
9222
9223 xpath_value_type xpath_variable::type() const
9224 {
9225 return _type;
9226 }
9227
9228 bool xpath_variable::get_boolean() const
9229 {
9230 return (_type == xpath_type_boolean) ? static_cast<const xpath_variable_boolean*>(this)->value : false;
9231 }
9232
9233 double xpath_variable::get_number() const
9234 {
9235 return (_type == xpath_type_number) ? static_cast<const xpath_variable_number*>(this)->value : gen_nan();
9236 }
9237
9238 const char_t* xpath_variable::get_string() const
9239 {
9240 const char_t* value = (_type == xpath_type_string) ? static_cast<const xpath_variable_string*>(this)->value : 0;
9241 return value ? value : PUGIXML_TEXT("");
9242 }
9243
9244 const xpath_node_set& xpath_variable::get_node_set() const
9245 {
9246 return (_type == xpath_type_node_set) ? static_cast<const xpath_variable_node_set*>(this)->value : dummy_node_set;
9247 }
9248
9249 bool xpath_variable::set(bool value)
9250 {
9251 if (_type != xpath_type_boolean) return false;
9252
9253 static_cast<xpath_variable_boolean*>(this)->value = value;
9254 return true;
9255 }
9256
9257 bool xpath_variable::set(double value)
9258 {
9259 if (_type != xpath_type_number) return false;
9260
9261 static_cast<xpath_variable_number*>(this)->value = value;
9262 return true;
9263 }
9264
9265 bool xpath_variable::set(const char_t* value)
9266 {
9267 if (_type != xpath_type_string) return false;
9268
9269 xpath_variable_string* var = static_cast<xpath_variable_string*>(this);
9270
9271 // duplicate string
9272 size_t size = (strlength(value) + 1) * sizeof(char_t);
9273
9274 char_t* copy = static_cast<char_t*>(global_allocate(size));
9275 if (!copy) return false;
9276
9277 memcpy(copy, value, size);
9278
9279 // replace old string
9280 if (var->value) global_deallocate(var->value);
9281 var->value = copy;
9282
9283 return true;
9284 }
9285
9286 bool xpath_variable::set(const xpath_node_set& value)
9287 {
9288 if (_type != xpath_type_node_set) return false;
9289
9290 static_cast<xpath_variable_node_set*>(this)->value = value;
9291 return true;
9292 }
9293
9294 xpath_variable_set::xpath_variable_set()
9295 {
9296 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
9297 }
9298
9299 xpath_variable_set::~xpath_variable_set()
9300 {
9301 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
9302 {
9303 xpath_variable* var = _data[i];
9304
9305 while (var)
9306 {
9307 xpath_variable* next = var->_next;
9308
9309 delete_xpath_variable(var->_type, var);
9310
9311 var = next;
9312 }
9313 }
9314 }
9315
9316 xpath_variable* xpath_variable_set::find(const char_t* name) const
9317 {
9318 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9319 size_t hash = hash_string(name) % hash_size;
9320
9321 // look for existing variable
9322 for (xpath_variable* var = _data[hash]; var; var = var->_next)
9323 if (strequal(var->name(), name))
9324 return var;
9325
9326 return 0;
9327 }
9328
9329 xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
9330 {
9331 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9332 size_t hash = hash_string(name) % hash_size;
9333
9334 // look for existing variable
9335 for (xpath_variable* var = _data[hash]; var; var = var->_next)
9336 if (strequal(var->name(), name))
9337 return var->type() == type ? var : 0;
9338
9339 // add new variable
9340 xpath_variable* result = new_xpath_variable(type, name);
9341
9342 if (result)
9343 {
9344 result->_type = type;
9345 result->_next = _data[hash];
9346
9347 _data[hash] = result;
9348 }
9349
9350 return result;
9351 }
9352
9353 bool xpath_variable_set::set(const char_t* name, bool value)
9354 {
9355 xpath_variable* var = add(name, xpath_type_boolean);
9356 return var ? var->set(value) : false;
9357 }
9358
9359 bool xpath_variable_set::set(const char_t* name, double value)
9360 {
9361 xpath_variable* var = add(name, xpath_type_number);
9362 return var ? var->set(value) : false;
9363 }
9364
9365 bool xpath_variable_set::set(const char_t* name, const char_t* value)
9366 {
9367 xpath_variable* var = add(name, xpath_type_string);
9368 return var ? var->set(value) : false;
9369 }
9370
9371 bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
9372 {
9373 xpath_variable* var = add(name, xpath_type_node_set);
9374 return var ? var->set(value) : false;
9375 }
9376
9377 xpath_variable* xpath_variable_set::get(const char_t* name)
9378 {
9379 return find(name);
9380 }
9381
9382 const xpath_variable* xpath_variable_set::get(const char_t* name) const
9383 {
9384 return find(name);
9385 }
9386
9387 xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
9388 {
9389 xpath_query_impl* impl = xpath_query_impl::create();
9390
9391 if (!impl)
9392 {
9393 #ifdef PUGIXML_NO_EXCEPTIONS
9394 _result.error = "Out of memory";
9395 #else
9396 throw std::bad_alloc();
9397 #endif
9398 }
9399 else
9400 {
9401 buffer_holder impl_holder(impl, xpath_query_impl::destroy);
9402
9403 impl->root = xpath_parser::parse(query, variables, &impl->alloc, &_result);
9404
9405 if (impl->root)
9406 {
9407 _impl = static_cast<xpath_query_impl*>(impl_holder.release());
9408 _result.error = 0;
9409 }
9410 }
9411 }
9412
9413 xpath_query::~xpath_query()
9414 {
9415 xpath_query_impl::destroy(_impl);
9416 }
9417
9418 xpath_value_type xpath_query::return_type() const
9419 {
9420 if (!_impl) return xpath_type_none;
9421
9422 return static_cast<xpath_query_impl*>(_impl)->root->rettype();
9423 }
9424
9425 bool xpath_query::evaluate_boolean(const xpath_node& n) const
9426 {
9427 if (!_impl) return false;
9428
9429 xpath_context c(n, 1, 1);
9430 xpath_stack_data sd;
9431
9432 #ifdef PUGIXML_NO_EXCEPTIONS
9433 if (setjmp(sd.error_handler)) return false;
9434 #endif
9435
9436 return static_cast<xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
9437 }
9438
9439 double xpath_query::evaluate_number(const xpath_node& n) const
9440 {
9441 if (!_impl) return gen_nan();
9442
9443 xpath_context c(n, 1, 1);
9444 xpath_stack_data sd;
9445
9446 #ifdef PUGIXML_NO_EXCEPTIONS
9447 if (setjmp(sd.error_handler)) return gen_nan();
9448 #endif
9449
9450 return static_cast<xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
9451 }
9452
9453#ifndef PUGIXML_NO_STL
9454 string_t xpath_query::evaluate_string(const xpath_node& n) const
9455 {
9456 xpath_stack_data sd;
9457
9458 return evaluate_string_impl(static_cast<xpath_query_impl*>(_impl), n, sd).c_str();
9459 }
9460#endif
9461
9462 size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
9463 {
9464 xpath_stack_data sd;
9465
9466 xpath_string r = evaluate_string_impl(static_cast<xpath_query_impl*>(_impl), n, sd);
9467
9468 size_t full_size = r.length() + 1;
9469
9470 if (capacity > 0)
9471 {
9472 size_t size = (full_size < capacity) ? full_size : capacity;
9473 assert(size > 0);
9474
9475 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
9476 buffer[size - 1] = 0;
9477 }
9478
9479 return full_size;
9480 }
9481
9482 xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
9483 {
9484 if (!_impl) return xpath_node_set();
9485
9486 xpath_ast_node* root = static_cast<xpath_query_impl*>(_impl)->root;
9487
9488 if (root->rettype() != xpath_type_node_set)
9489 {
9490 #ifdef PUGIXML_NO_EXCEPTIONS
9491 return xpath_node_set();
9492 #else
9493 xpath_parse_result result;
9494 result.error = "Expression does not evaluate to node set";
9495
9496 throw xpath_exception(result);
9497 #endif
9498 }
9499
9500 xpath_context c(n, 1, 1);
9501 xpath_stack_data sd;
9502
9503 #ifdef PUGIXML_NO_EXCEPTIONS
9504 if (setjmp(sd.error_handler)) return xpath_node_set();
9505 #endif
9506
9507 xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
9508
9509 return xpath_node_set(r.begin(), r.end(), r.type());
9510 }
9511
9512 const xpath_parse_result& xpath_query::result() const
9513 {
9514 return _result;
9515 }
9516
9517 xpath_query::operator xpath_query::unspecified_bool_type() const
9518 {
9519 return _impl ? &xpath_query::_impl : 0;
9520 }
9521
9522 bool xpath_query::operator!() const
9523 {
9524 return !_impl;
9525 }
9526
9527 xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
9528 {
9529 xpath_query q(query, variables);
9530 return select_single_node(q);
9531 }
9532
9533 xpath_node xml_node::select_single_node(const xpath_query& query) const
9534 {
9535 xpath_node_set s = query.evaluate_node_set(*this);
9536 return s.empty() ? xpath_node() : s.first();
9537 }
9538
9539 xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
9540 {
9541 xpath_query q(query, variables);
9542 return select_nodes(q);
9543 }
9544
9545 xpath_node_set xml_node::select_nodes(const xpath_query& query) const
9546 {
9547 return query.evaluate_node_set(*this);
9548 }
9549}
9550
9551#endif
9552
9553/**
9554 * Copyright (c) 2006-2010 Arseny Kapoulkine
9555 *
9556 * Permission is hereby granted, free of charge, to any person
9557 * obtaining a copy of this software and associated documentation
9558 * files (the "Software"), to deal in the Software without
9559 * restriction, including without limitation the rights to use,
9560 * copy, modify, merge, publish, distribute, sublicense, and/or sell
9561 * copies of the Software, and to permit persons to whom the
9562 * Software is furnished to do so, subject to the following
9563 * conditions:
9564 *
9565 * The above copyright notice and this permission notice shall be
9566 * included in all copies or substantial portions of the Software.
9567 *
9568 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
9569 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
9570 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
9571 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
9572 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
9573 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
9574 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
9575 * OTHER DEALINGS IN THE SOFTWARE.
9576 */
Note: See TracBrowser for help on using the repository browser.