source: src/Parser/pugixml/pugixml.cpp@ 5aaa43

Action_Thermostats Add_AtomRandomPerturbation Add_FitFragmentPartialChargesAction Add_RotateAroundBondAction Add_SelectAtomByNameAction Added_ParseSaveFragmentResults AddingActions_SaveParseParticleParameters Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_ParticleName_to_Atom Adding_StructOpt_integration_tests AtomFragments Automaking_mpqc_open AutomationFragmentation_failures Candidate_v1.5.4 Candidate_v1.6.0 Candidate_v1.6.1 Candidate_v1.7.0 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator CombiningParticlePotentialParsing Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_BoundInBox_CenterInBox_MoleculeActions Fix_ChargeSampling_PBC Fix_ChronosMutex Fix_FitPartialCharges Fix_FitPotential_needs_atomicnumbers Fix_ForceAnnealing Fix_IndependentFragmentGrids Fix_ParseParticles Fix_ParseParticles_split_forward_backward_Actions Fix_PopActions Fix_QtFragmentList_sorted_selection Fix_Restrictedkeyset_FragmentMolecule Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns Fix_fitting_potentials Fixes ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion FragmentAction_writes_AtomFragments FragmentMolecule_checks_bonddegrees GeometryObjects Gui_Fixes Gui_displays_atomic_force_velocity ImplicitCharges IndependentFragmentGrids IndependentFragmentGrids_IndividualZeroInstances IndependentFragmentGrids_IntegrationTest IndependentFragmentGrids_Sole_NN_Calculation JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix MoreRobust_FragmentAutomation ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PdbParser_setsAtomName PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks Rewrite_FitPartialCharges RotateToPrincipalAxisSystem_UndoRedo SaturateAtoms_findBestMatching SaturateAtoms_singleDegree StoppableMakroAction Subpackage_CodePatterns Subpackage_JobMarket Subpackage_LinearAlgebra Subpackage_levmar Subpackage_mpqc_open Subpackage_vmg Switchable_LogView ThirdParty_MPQC_rebuilt_buildsystem TrajectoryDependenant_MaxOrder TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps TremoloParser_setsAtomName Ubuntu_1604_changes stable
Last change on this file since 5aaa43 was d2596b, checked in by Frederik Heber <heber@…>, 14 years ago

Added XmlParser for parsing configurations for ScaFaCoS generic test code.

  • XML is parsed via pugixml which is placed in subfolder pugixml in src/Parser.
  • NOTE: pugixml does not import/export double with high enough precision. Hence, we always obtain strings and convert them ourselves.
  • also added unit test on the new parser.
  • NOTE: Unit test is failing as charges are not yet written correctly, hence marked as XFAIL.
  • Property mode set to 100644
File size: 242.4 KB
RevLine 
[d2596b]1/**
2 * pugixml parser - version 1.0
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at http://pugixml.org/
6 *
7 * This library is distributed under the MIT License. See notice at the end
8 * of this file.
9 *
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12 */
13
14#include "pugixml.hpp"
15
16#include <stdlib.h>
17#include <stdio.h>
18#include <string.h>
19#include <assert.h>
20#include <setjmp.h>
21#include <wchar.h>
22
23#ifndef PUGIXML_NO_XPATH
24# include <math.h>
25# include <float.h>
26#endif
27
28#ifndef PUGIXML_NO_STL
29# include <istream>
30# include <ostream>
31# include <string>
32#endif
33
34// For placement new
35#include <new>
36
37#ifdef _MSC_VER
38# pragma warning(disable: 4127) // conditional expression is constant
39# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
40# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
41# pragma warning(disable: 4702) // unreachable code
42# pragma warning(disable: 4996) // this function or variable may be unsafe
43#endif
44
45#ifdef __INTEL_COMPILER
46# pragma warning(disable: 177) // function was declared but never referenced
47# pragma warning(disable: 279) // controlling expression is constant
48# pragma warning(disable: 1478 1786) // function was declared "deprecated"
49#endif
50
51#ifdef __BORLANDC__
52# pragma warn -8008 // condition is always false
53# pragma warn -8066 // unreachable code
54#endif
55
56#ifdef __SNC__
57# pragma diag_suppress=178 // function was declared but never referenced
58# pragma diag_suppress=237 // controlling expression is constant
59#endif
60
61// uintptr_t
62#if !defined(_MSC_VER) || _MSC_VER >= 1600
63# include <stdint.h>
64#else
65# if _MSC_VER < 1300
66// No native uintptr_t in MSVC6
67typedef size_t uintptr_t;
68# endif
69typedef unsigned __int8 uint8_t;
70typedef unsigned __int16 uint16_t;
71typedef unsigned __int32 uint32_t;
72typedef __int32 int32_t;
73#endif
74
75// Inlining controls
76#if defined(_MSC_VER) && _MSC_VER >= 1300
77# define PUGIXML_NO_INLINE __declspec(noinline)
78#elif defined(__GNUC__)
79# define PUGIXML_NO_INLINE __attribute__((noinline))
80#else
81# define PUGIXML_NO_INLINE
82#endif
83
84// Simple static assertion
85#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
86
87// Digital Mars C++ bug workaround for passing char loaded from memory via stack
88#ifdef __DMC__
89# define DMC_VOLATILE volatile
90#else
91# define DMC_VOLATILE
92#endif
93
94using namespace pugi;
95
96// Memory allocation
97namespace
98{
99 void* default_allocate(size_t size)
100 {
101 return malloc(size);
102 }
103
104 void default_deallocate(void* ptr)
105 {
106 free(ptr);
107 }
108
109 allocation_function global_allocate = default_allocate;
110 deallocation_function global_deallocate = default_deallocate;
111}
112
113// String utilities
114namespace
115{
116 // Get string length
117 size_t strlength(const char_t* s)
118 {
119 assert(s);
120
121 #ifdef PUGIXML_WCHAR_MODE
122 return wcslen(s);
123 #else
124 return strlen(s);
125 #endif
126 }
127
128 // Compare two strings
129 bool strequal(const char_t* src, const char_t* dst)
130 {
131 assert(src && dst);
132
133 #ifdef PUGIXML_WCHAR_MODE
134 return wcscmp(src, dst) == 0;
135 #else
136 return strcmp(src, dst) == 0;
137 #endif
138 }
139
140 // Compare lhs with [rhs_begin, rhs_end)
141 bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
142 {
143 for (size_t i = 0; i < count; ++i)
144 if (lhs[i] != rhs[i])
145 return false;
146
147 return lhs[count] == 0;
148 }
149
150#ifdef PUGIXML_WCHAR_MODE
151 // Convert string to wide string, assuming all symbols are ASCII
152 void widen_ascii(wchar_t* dest, const char* source)
153 {
154 for (const char* i = source; *i; ++i) *dest++ = *i;
155 *dest = 0;
156 }
157#endif
158}
159
160#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
161// auto_ptr-like buffer holder for exception recovery
162namespace
163{
164 struct buffer_holder
165 {
166 void* data;
167 void (*deleter)(void*);
168
169 buffer_holder(void* data, void (*deleter)(void*)): data(data), deleter(deleter)
170 {
171 }
172
173 ~buffer_holder()
174 {
175 if (data) deleter(data);
176 }
177
178 void* release()
179 {
180 void* result = data;
181 data = 0;
182 return result;
183 }
184 };
185}
186#endif
187
188namespace
189{
190 static const size_t xml_memory_page_size = 32768;
191
192 static const uintptr_t xml_memory_page_alignment = 32;
193 static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
194 static const uintptr_t xml_memory_page_name_allocated_mask = 16;
195 static const uintptr_t xml_memory_page_value_allocated_mask = 8;
196 static const uintptr_t xml_memory_page_type_mask = 7;
197
198 struct xml_allocator;
199
200 struct xml_memory_page
201 {
202 static xml_memory_page* construct(void* memory)
203 {
204 if (!memory) return 0; //$ redundant, left for performance
205
206 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
207
208 result->allocator = 0;
209 result->memory = 0;
210 result->prev = 0;
211 result->next = 0;
212 result->busy_size = 0;
213 result->freed_size = 0;
214
215 return result;
216 }
217
218 xml_allocator* allocator;
219
220 void* memory;
221
222 xml_memory_page* prev;
223 xml_memory_page* next;
224
225 size_t busy_size;
226 size_t freed_size;
227
228 char data[1];
229 };
230
231 struct xml_memory_string_header
232 {
233 uint16_t page_offset; // offset from page->data
234 uint16_t full_size; // 0 if string occupies whole page
235 };
236
237 struct xml_allocator
238 {
239 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
240 {
241 }
242
243 xml_memory_page* allocate_page(size_t data_size)
244 {
245 size_t size = offsetof(xml_memory_page, data) + data_size;
246
247 // allocate block with some alignment, leaving memory for worst-case padding
248 void* memory = global_allocate(size + xml_memory_page_alignment);
249 if (!memory) return 0;
250
251 // align upwards to page boundary
252 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
253
254 // prepare page structure
255 xml_memory_page* page = xml_memory_page::construct(page_memory);
256
257 page->memory = memory;
258 page->allocator = _root->allocator;
259
260 return page;
261 }
262
263 static void deallocate_page(xml_memory_page* page)
264 {
265 global_deallocate(page->memory);
266 }
267
268 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
269
270 void* allocate_memory(size_t size, xml_memory_page*& out_page)
271 {
272 if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
273
274 void* buf = _root->data + _busy_size;
275
276 _busy_size += size;
277
278 out_page = _root;
279
280 return buf;
281 }
282
283 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
284 {
285 if (page == _root) page->busy_size = _busy_size;
286
287 assert(ptr >= page->data && ptr < page->data + page->busy_size);
288 (void)!ptr;
289
290 page->freed_size += size;
291 assert(page->freed_size <= page->busy_size);
292
293 if (page->freed_size == page->busy_size)
294 {
295 if (page->next == 0)
296 {
297 assert(_root == page);
298
299 // top page freed, just reset sizes
300 page->busy_size = page->freed_size = 0;
301 _busy_size = 0;
302 }
303 else
304 {
305 assert(_root != page);
306 assert(page->prev);
307
308 // remove from the list
309 page->prev->next = page->next;
310 page->next->prev = page->prev;
311
312 // deallocate
313 deallocate_page(page);
314 }
315 }
316 }
317
318 char_t* allocate_string(size_t length)
319 {
320 // allocate memory for string and header block
321 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
322
323 // round size up to pointer alignment boundary
324 size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
325
326 xml_memory_page* page;
327 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
328
329 if (!header) return 0;
330
331 // setup header
332 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
333
334 assert(page_offset >= 0 && page_offset < (1 << 16));
335 header->page_offset = static_cast<uint16_t>(page_offset);
336
337 // full_size == 0 for large strings that occupy the whole page
338 assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
339 header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
340
341 return reinterpret_cast<char_t*>(header + 1);
342 }
343
344 void deallocate_string(char_t* string)
345 {
346 // get header
347 xml_memory_string_header* header = reinterpret_cast<xml_memory_string_header*>(string) - 1;
348
349 // deallocate
350 size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
351 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(reinterpret_cast<char*>(header) - page_offset);
352
353 // if full_size == 0 then this string occupies the whole page
354 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
355
356 deallocate_memory(header, full_size, page);
357 }
358
359 xml_memory_page* _root;
360 size_t _busy_size;
361 };
362
363 PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
364 {
365 const size_t large_allocation_threshold = xml_memory_page_size / 4;
366
367 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
368 if (!page) return 0;
369
370 if (size <= large_allocation_threshold)
371 {
372 _root->busy_size = _busy_size;
373
374 // insert page at the end of linked list
375 page->prev = _root;
376 _root->next = page;
377 _root = page;
378
379 _busy_size = size;
380 }
381 else
382 {
383 // insert page before the end of linked list, so that it is deleted as soon as possible
384 // the last page is not deleted even if it's empty (see deallocate_memory)
385 assert(_root->prev);
386
387 page->prev = _root->prev;
388 page->next = _root;
389
390 _root->prev->next = page;
391 _root->prev = page;
392 }
393
394 // allocate inside page
395 page->busy_size = size;
396
397 out_page = page;
398 return page->data;
399 }
400}
401
402namespace pugi
403{
404 /// A 'name=value' XML attribute structure.
405 struct xml_attribute_struct
406 {
407 /// Default ctor
408 xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
409 {
410 }
411
412 uintptr_t header;
413
414 char_t* name; ///< Pointer to attribute name.
415 char_t* value; ///< Pointer to attribute value.
416
417 xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
418 xml_attribute_struct* next_attribute; ///< Next attribute
419 };
420
421 /// An XML document tree node.
422 struct xml_node_struct
423 {
424 /// Default ctor
425 /// \param type - node type
426 xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
427 {
428 }
429
430 uintptr_t header;
431
432 xml_node_struct* parent; ///< Pointer to parent
433
434 char_t* name; ///< Pointer to element name.
435 char_t* value; ///< Pointer to any associated string data.
436
437 xml_node_struct* first_child; ///< First child
438
439 xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
440 xml_node_struct* next_sibling; ///< Right brother
441
442 xml_attribute_struct* first_attribute; ///< First attribute
443 };
444}
445
446namespace
447{
448 struct xml_document_struct: public xml_node_struct, public xml_allocator
449 {
450 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0)
451 {
452 }
453
454 const char_t* buffer;
455 };
456
457 static inline xml_allocator& get_allocator(const xml_node_struct* node)
458 {
459 assert(node);
460
461 return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
462 }
463}
464
465// Low-level DOM operations
466namespace
467{
468 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
469 {
470 xml_memory_page* page;
471 void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
472
473 return new (memory) xml_attribute_struct(page);
474 }
475
476 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
477 {
478 xml_memory_page* page;
479 void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
480
481 return new (memory) xml_node_struct(page, type);
482 }
483
484 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
485 {
486 uintptr_t header = a->header;
487
488 if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
489 if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
490
491 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
492 }
493
494 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
495 {
496 uintptr_t header = n->header;
497
498 if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
499 if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
500
501 for (xml_attribute_struct* attr = n->first_attribute; attr; )
502 {
503 xml_attribute_struct* next = attr->next_attribute;
504
505 destroy_attribute(attr, alloc);
506
507 attr = next;
508 }
509
510 for (xml_node_struct* child = n->first_child; child; )
511 {
512 xml_node_struct* next = child->next_sibling;
513
514 destroy_node(child, alloc);
515
516 child = next;
517 }
518
519 alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
520 }
521
522 PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
523 {
524 xml_node_struct* child = allocate_node(alloc, type);
525 if (!child) return 0;
526
527 child->parent = node;
528
529 xml_node_struct* first_child = node->first_child;
530
531 if (first_child)
532 {
533 xml_node_struct* last_child = first_child->prev_sibling_c;
534
535 last_child->next_sibling = child;
536 child->prev_sibling_c = last_child;
537 first_child->prev_sibling_c = child;
538 }
539 else
540 {
541 node->first_child = child;
542 child->prev_sibling_c = child;
543 }
544
545 return child;
546 }
547
548 PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
549 {
550 xml_attribute_struct* a = allocate_attribute(alloc);
551 if (!a) return 0;
552
553 xml_attribute_struct* first_attribute = node->first_attribute;
554
555 if (first_attribute)
556 {
557 xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
558
559 last_attribute->next_attribute = a;
560 a->prev_attribute_c = last_attribute;
561 first_attribute->prev_attribute_c = a;
562 }
563 else
564 {
565 node->first_attribute = a;
566 a->prev_attribute_c = a;
567 }
568
569 return a;
570 }
571}
572
573// Helper classes for code generation
574namespace
575{
576 struct opt_false
577 {
578 enum { value = 0 };
579 };
580
581 struct opt_true
582 {
583 enum { value = 1 };
584 };
585}
586
587// Unicode utilities
588namespace
589{
590 inline uint16_t endian_swap(uint16_t value)
591 {
592 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
593 }
594
595 inline uint32_t endian_swap(uint32_t value)
596 {
597 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
598 }
599
600 struct utf8_counter
601 {
602 typedef size_t value_type;
603
604 static value_type low(value_type result, uint32_t ch)
605 {
606 // U+0000..U+007F
607 if (ch < 0x80) return result + 1;
608 // U+0080..U+07FF
609 else if (ch < 0x800) return result + 2;
610 // U+0800..U+FFFF
611 else return result + 3;
612 }
613
614 static value_type high(value_type result, uint32_t)
615 {
616 // U+10000..U+10FFFF
617 return result + 4;
618 }
619 };
620
621 struct utf8_writer
622 {
623 typedef uint8_t* value_type;
624
625 static value_type low(value_type result, uint32_t ch)
626 {
627 // U+0000..U+007F
628 if (ch < 0x80)
629 {
630 *result = static_cast<uint8_t>(ch);
631 return result + 1;
632 }
633 // U+0080..U+07FF
634 else if (ch < 0x800)
635 {
636 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
637 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
638 return result + 2;
639 }
640 // U+0800..U+FFFF
641 else
642 {
643 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
644 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
645 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
646 return result + 3;
647 }
648 }
649
650 static value_type high(value_type result, uint32_t ch)
651 {
652 // U+10000..U+10FFFF
653 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
654 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
655 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
656 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
657 return result + 4;
658 }
659
660 static value_type any(value_type result, uint32_t ch)
661 {
662 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
663 }
664 };
665
666 struct utf16_counter
667 {
668 typedef size_t value_type;
669
670 static value_type low(value_type result, uint32_t)
671 {
672 return result + 1;
673 }
674
675 static value_type high(value_type result, uint32_t)
676 {
677 return result + 2;
678 }
679 };
680
681 struct utf16_writer
682 {
683 typedef uint16_t* value_type;
684
685 static value_type low(value_type result, uint32_t ch)
686 {
687 *result = static_cast<uint16_t>(ch);
688
689 return result + 1;
690 }
691
692 static value_type high(value_type result, uint32_t ch)
693 {
694 uint32_t msh = (uint32_t)(ch - 0x10000) >> 10;
695 uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff;
696
697 result[0] = static_cast<uint16_t>(0xD800 + msh);
698 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
699
700 return result + 2;
701 }
702
703 static value_type any(value_type result, uint32_t ch)
704 {
705 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
706 }
707 };
708
709 struct utf32_counter
710 {
711 typedef size_t value_type;
712
713 static value_type low(value_type result, uint32_t)
714 {
715 return result + 1;
716 }
717
718 static value_type high(value_type result, uint32_t)
719 {
720 return result + 1;
721 }
722 };
723
724 struct utf32_writer
725 {
726 typedef uint32_t* value_type;
727
728 static value_type low(value_type result, uint32_t ch)
729 {
730 *result = ch;
731
732 return result + 1;
733 }
734
735 static value_type high(value_type result, uint32_t ch)
736 {
737 *result = ch;
738
739 return result + 1;
740 }
741
742 static value_type any(value_type result, uint32_t ch)
743 {
744 *result = ch;
745
746 return result + 1;
747 }
748 };
749
750 template <size_t size> struct wchar_selector;
751
752 template <> struct wchar_selector<2>
753 {
754 typedef uint16_t type;
755 typedef utf16_counter counter;
756 typedef utf16_writer writer;
757 };
758
759 template <> struct wchar_selector<4>
760 {
761 typedef uint32_t type;
762 typedef utf32_counter counter;
763 typedef utf32_writer writer;
764 };
765
766 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
767 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
768
769 template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
770 {
771 static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
772 {
773 const uint8_t utf8_byte_mask = 0x3f;
774
775 while (size)
776 {
777 uint8_t lead = *data;
778
779 // 0xxxxxxx -> U+0000..U+007F
780 if (lead < 0x80)
781 {
782 result = Traits::low(result, lead);
783 data += 1;
784 size -= 1;
785
786 // process aligned single-byte (ascii) blocks
787 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
788 {
789 while (size >= 4 && (*reinterpret_cast<const uint32_t*>(data) & 0x80808080) == 0)
790 {
791 result = Traits::low(result, data[0]);
792 result = Traits::low(result, data[1]);
793 result = Traits::low(result, data[2]);
794 result = Traits::low(result, data[3]);
795 data += 4;
796 size -= 4;
797 }
798 }
799 }
800 // 110xxxxx -> U+0080..U+07FF
801 else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
802 {
803 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
804 data += 2;
805 size -= 2;
806 }
807 // 1110xxxx -> U+0800-U+FFFF
808 else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
809 {
810 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
811 data += 3;
812 size -= 3;
813 }
814 // 11110xxx -> U+10000..U+10FFFF
815 else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
816 {
817 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
818 data += 4;
819 size -= 4;
820 }
821 // 10xxxxxx or 11111xxx -> invalid
822 else
823 {
824 data += 1;
825 size -= 1;
826 }
827 }
828
829 return result;
830 }
831
832 static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
833 {
834 const uint16_t* end = data + size;
835
836 while (data < end)
837 {
838 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
839
840 // U+0000..U+D7FF
841 if (lead < 0xD800)
842 {
843 result = Traits::low(result, lead);
844 data += 1;
845 }
846 // U+E000..U+FFFF
847 else if ((unsigned)(lead - 0xE000) < 0x2000)
848 {
849 result = Traits::low(result, lead);
850 data += 1;
851 }
852 // surrogate pair lead
853 else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end)
854 {
855 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
856
857 if ((unsigned)(next - 0xDC00) < 0x400)
858 {
859 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
860 data += 2;
861 }
862 else
863 {
864 data += 1;
865 }
866 }
867 else
868 {
869 data += 1;
870 }
871 }
872
873 return result;
874 }
875
876 static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
877 {
878 const uint32_t* end = data + size;
879
880 while (data < end)
881 {
882 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
883
884 // U+0000..U+FFFF
885 if (lead < 0x10000)
886 {
887 result = Traits::low(result, lead);
888 data += 1;
889 }
890 // U+10000..U+10FFFF
891 else
892 {
893 result = Traits::high(result, lead);
894 data += 1;
895 }
896 }
897
898 return result;
899 }
900 };
901
902 template <typename T> inline void convert_utf_endian_swap(T* result, const T* data, size_t length)
903 {
904 for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
905 }
906
907 inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
908 {
909 for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
910 }
911}
912
913namespace
914{
915 enum chartype_t
916 {
917 ct_parse_pcdata = 1, // \0, &, \r, <
918 ct_parse_attr = 2, // \0, &, \r, ', "
919 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
920 ct_space = 8, // \r, \n, space, tab
921 ct_parse_cdata = 16, // \0, ], >, \r
922 ct_parse_comment = 32, // \0, -, >, \r
923 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
924 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
925 };
926
927 const unsigned char chartype_table[256] =
928 {
929 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
930 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
931 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
932 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
933 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
934 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
935 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
936 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
937
938 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
939 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
940 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
941 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
942 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
943 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
944 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
945 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
946 };
947
948 enum chartypex_t
949 {
950 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
951 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
952 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
953 ctx_digit = 8, // 0-9
954 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
955 };
956
957 const unsigned char chartypex_table[256] =
958 {
959 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
960 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
961 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
962 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
963
964 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
965 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
966 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
967 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
968
969 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
970 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
971 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
972 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
973 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
974 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
975 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
976 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
977 };
978
979#ifdef PUGIXML_WCHAR_MODE
980 #define IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
981#else
982 #define IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
983#endif
984
985 #define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table)
986 #define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table)
987
988 bool is_little_endian()
989 {
990 unsigned int ui = 1;
991
992 return *reinterpret_cast<unsigned char*>(&ui) == 1;
993 }
994
995 xml_encoding get_wchar_encoding()
996 {
997 STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
998
999 if (sizeof(wchar_t) == 2)
1000 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1001 else
1002 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1003 }
1004
1005 xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
1006 {
1007 // look for BOM in first few bytes
1008 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1009 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1010 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1011 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1012 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1013
1014 // look for <, <? or <?xm in various encodings
1015 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1016 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1017 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1018 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1019 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
1020
1021 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1022 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1023 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1024
1025 // no known BOM detected, assume utf8
1026 return encoding_utf8;
1027 }
1028
1029 xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
1030 {
1031 // replace wchar encoding with utf implementation
1032 if (encoding == encoding_wchar) return get_wchar_encoding();
1033
1034 // replace utf16 encoding with utf16 with specific endianness
1035 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1036
1037 // replace utf32 encoding with utf32 with specific endianness
1038 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1039
1040 // only do autodetection if no explicit encoding is requested
1041 if (encoding != encoding_auto) return encoding;
1042
1043 // skip encoding autodetection if input buffer is too small
1044 if (size < 4) return encoding_utf8;
1045
1046 // try to guess encoding (based on XML specification, Appendix F.1)
1047 const uint8_t* data = static_cast<const uint8_t*>(contents);
1048
1049 DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1050
1051 return guess_buffer_encoding(d0, d1, d2, d3);
1052 }
1053
1054 bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1055 {
1056 if (is_mutable)
1057 {
1058 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
1059 }
1060 else
1061 {
1062 void* buffer = global_allocate(size > 0 ? size : 1);
1063 if (!buffer) return false;
1064
1065 memcpy(buffer, contents, size);
1066
1067 out_buffer = static_cast<char_t*>(buffer);
1068 }
1069
1070 out_length = size / sizeof(char_t);
1071
1072 return true;
1073 }
1074
1075#ifdef PUGIXML_WCHAR_MODE
1076 inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
1077 {
1078 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
1079 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
1080 }
1081
1082 bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1083 {
1084 const char_t* data = static_cast<const char_t*>(contents);
1085
1086 if (is_mutable)
1087 {
1088 out_buffer = const_cast<char_t*>(data);
1089 }
1090 else
1091 {
1092 out_buffer = static_cast<char_t*>(global_allocate(size > 0 ? size : 1));
1093 if (!out_buffer) return false;
1094 }
1095
1096 out_length = size / sizeof(char_t);
1097
1098 convert_wchar_endian_swap(out_buffer, data, out_length);
1099
1100 return true;
1101 }
1102
1103 bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
1104 {
1105 const uint8_t* data = static_cast<const uint8_t*>(contents);
1106
1107 // first pass: get length in wchar_t units
1108 out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1109
1110 // allocate buffer of suitable length
1111 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1112 if (!out_buffer) return false;
1113
1114 // second pass: convert utf8 input to wchar_t
1115 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1116 wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
1117
1118 assert(out_end == out_begin + out_length);
1119 (void)!out_end;
1120
1121 return true;
1122 }
1123
1124 template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1125 {
1126 const uint16_t* data = static_cast<const uint16_t*>(contents);
1127 size_t length = size / sizeof(uint16_t);
1128
1129 // first pass: get length in wchar_t units
1130 out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
1131
1132 // allocate buffer of suitable length
1133 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1134 if (!out_buffer) return false;
1135
1136 // second pass: convert utf16 input to wchar_t
1137 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1138 wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
1139
1140 assert(out_end == out_begin + out_length);
1141 (void)!out_end;
1142
1143 return true;
1144 }
1145
1146 template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1147 {
1148 const uint32_t* data = static_cast<const uint32_t*>(contents);
1149 size_t length = size / sizeof(uint32_t);
1150
1151 // first pass: get length in wchar_t units
1152 out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
1153
1154 // allocate buffer of suitable length
1155 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1156 if (!out_buffer) return false;
1157
1158 // second pass: convert utf32 input to wchar_t
1159 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1160 wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
1161
1162 assert(out_end == out_begin + out_length);
1163 (void)!out_end;
1164
1165 return true;
1166 }
1167
1168 bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1169 {
1170 // get native encoding
1171 xml_encoding wchar_encoding = get_wchar_encoding();
1172
1173 // fast path: no conversion required
1174 if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1175
1176 // only endian-swapping is required
1177 if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
1178
1179 // source encoding is utf8
1180 if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
1181
1182 // source encoding is utf16
1183 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1184 {
1185 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1186
1187 return (native_encoding == encoding) ?
1188 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1189 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1190 }
1191
1192 // source encoding is utf32
1193 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1194 {
1195 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1196
1197 return (native_encoding == encoding) ?
1198 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1199 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1200 }
1201
1202 assert(!"Invalid encoding");
1203 return false;
1204 }
1205#else
1206 template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1207 {
1208 const uint16_t* data = static_cast<const uint16_t*>(contents);
1209 size_t length = size / sizeof(uint16_t);
1210
1211 // first pass: get length in utf8 units
1212 out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
1213
1214 // allocate buffer of suitable length
1215 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1216 if (!out_buffer) return false;
1217
1218 // second pass: convert utf16 input to utf8
1219 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1220 uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
1221
1222 assert(out_end == out_begin + out_length);
1223 (void)!out_end;
1224
1225 return true;
1226 }
1227
1228 template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1229 {
1230 const uint32_t* data = static_cast<const uint32_t*>(contents);
1231 size_t length = size / sizeof(uint32_t);
1232
1233 // first pass: get length in utf8 units
1234 out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
1235
1236 // allocate buffer of suitable length
1237 out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1238 if (!out_buffer) return false;
1239
1240 // second pass: convert utf32 input to utf8
1241 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1242 uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
1243
1244 assert(out_end == out_begin + out_length);
1245 (void)!out_end;
1246
1247 return true;
1248 }
1249
1250 bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1251 {
1252 // fast path: no conversion required
1253 if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1254
1255 // source encoding is utf16
1256 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1257 {
1258 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1259
1260 return (native_encoding == encoding) ?
1261 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1262 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1263 }
1264
1265 // source encoding is utf32
1266 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1267 {
1268 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1269
1270 return (native_encoding == encoding) ?
1271 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1272 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1273 }
1274
1275 assert(!"Invalid encoding");
1276 return false;
1277 }
1278#endif
1279
1280 size_t as_utf8_begin(const wchar_t* str, size_t length)
1281 {
1282 STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1283
1284 // get length in utf8 characters
1285 return sizeof(wchar_t) == 2 ?
1286 utf_decoder<utf8_counter>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, 0) :
1287 utf_decoder<utf8_counter>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, 0);
1288 }
1289
1290 void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
1291 {
1292 STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1293
1294 // convert to utf8
1295 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
1296 uint8_t* end = sizeof(wchar_t) == 2 ?
1297 utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, begin) :
1298 utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, begin);
1299
1300 assert(begin + size == end);
1301 (void)!end;
1302
1303 // zero-terminate
1304 buffer[size] = 0;
1305 }
1306
1307#ifndef PUGIXML_NO_STL
1308 std::string as_utf8_impl(const wchar_t* str, size_t length)
1309 {
1310 // first pass: get length in utf8 characters
1311 size_t size = as_utf8_begin(str, length);
1312
1313 // allocate resulting string
1314 std::string result;
1315 result.resize(size);
1316
1317 // second pass: convert to utf8
1318 if (size > 0) as_utf8_end(&result[0], size, str, length);
1319
1320 return result;
1321 }
1322
1323 std::wstring as_wide_impl(const char* str, size_t size)
1324 {
1325 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
1326
1327 // first pass: get length in wchar_t units
1328 size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1329
1330 // allocate resulting string
1331 std::wstring result;
1332 result.resize(length);
1333
1334 // second pass: convert to wchar_t
1335 if (length > 0)
1336 {
1337 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
1338 wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
1339
1340 assert(begin + length == end);
1341 (void)!end;
1342 }
1343
1344 return result;
1345 }
1346#endif
1347
1348 inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
1349 {
1350 assert(target);
1351 size_t target_length = strlength(target);
1352
1353 // always reuse document buffer memory if possible
1354 if (!allocated) return target_length >= length;
1355
1356 // reuse heap memory if waste is not too great
1357 const size_t reuse_threshold = 32;
1358
1359 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
1360 }
1361
1362 bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
1363 {
1364 size_t source_length = strlength(source);
1365
1366 if (source_length == 0)
1367 {
1368 // empty string and null pointer are equivalent, so just deallocate old memory
1369 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1370
1371 if (header & header_mask) alloc->deallocate_string(dest);
1372
1373 // mark the string as not allocated
1374 dest = 0;
1375 header &= ~header_mask;
1376
1377 return true;
1378 }
1379 else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
1380 {
1381 // we can reuse old buffer, so just copy the new data (including zero terminator)
1382 memcpy(dest, source, (source_length + 1) * sizeof(char_t));
1383
1384 return true;
1385 }
1386 else
1387 {
1388 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1389
1390 // allocate new buffer
1391 char_t* buf = alloc->allocate_string(source_length + 1);
1392 if (!buf) return false;
1393
1394 // copy the string (including zero terminator)
1395 memcpy(buf, source, (source_length + 1) * sizeof(char_t));
1396
1397 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
1398 if (header & header_mask) alloc->deallocate_string(dest);
1399
1400 // the string is now allocated, so set the flag
1401 dest = buf;
1402 header |= header_mask;
1403
1404 return true;
1405 }
1406 }
1407
1408 struct gap
1409 {
1410 char_t* end;
1411 size_t size;
1412
1413 gap(): end(0), size(0)
1414 {
1415 }
1416
1417 // Push new gap, move s count bytes further (skipping the gap).
1418 // Collapse previous gap.
1419 void push(char_t*& s, size_t count)
1420 {
1421 if (end) // there was a gap already; collapse it
1422 {
1423 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
1424 assert(s >= end);
1425 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1426 }
1427
1428 s += count; // end of current gap
1429
1430 // "merge" two gaps
1431 end = s;
1432 size += count;
1433 }
1434
1435 // Collapse all gaps, return past-the-end pointer
1436 char_t* flush(char_t* s)
1437 {
1438 if (end)
1439 {
1440 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
1441 assert(s >= end);
1442 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1443
1444 return s - size;
1445 }
1446 else return s;
1447 }
1448 };
1449
1450 char_t* strconv_escape(char_t* s, gap& g)
1451 {
1452 char_t* stre = s + 1;
1453
1454 switch (*stre)
1455 {
1456 case '#': // &#...
1457 {
1458 unsigned int ucsc = 0;
1459
1460 if (stre[1] == 'x') // &#x... (hex code)
1461 {
1462 stre += 2;
1463
1464 char_t ch = *stre;
1465
1466 if (ch == ';') return stre;
1467
1468 for (;;)
1469 {
1470 if (static_cast<unsigned int>(ch - '0') <= 9)
1471 ucsc = 16 * ucsc + (ch - '0');
1472 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
1473 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
1474 else if (ch == ';')
1475 break;
1476 else // cancel
1477 return stre;
1478
1479 ch = *++stre;
1480 }
1481
1482 ++stre;
1483 }
1484 else // &#... (dec code)
1485 {
1486 char_t ch = *++stre;
1487
1488 if (ch == ';') return stre;
1489
1490 for (;;)
1491 {
1492 if (static_cast<unsigned int>(ch - '0') <= 9)
1493 ucsc = 10 * ucsc + (ch - '0');
1494 else if (ch == ';')
1495 break;
1496 else // cancel
1497 return stre;
1498
1499 ch = *++stre;
1500 }
1501
1502 ++stre;
1503 }
1504
1505 #ifdef PUGIXML_WCHAR_MODE
1506 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
1507 #else
1508 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
1509 #endif
1510
1511 g.push(s, stre - s);
1512 return stre;
1513 }
1514 case 'a': // &a
1515 {
1516 ++stre;
1517
1518 if (*stre == 'm') // &am
1519 {
1520 if (*++stre == 'p' && *++stre == ';') // &amp;
1521 {
1522 *s++ = '&';
1523 ++stre;
1524
1525 g.push(s, stre - s);
1526 return stre;
1527 }
1528 }
1529 else if (*stre == 'p') // &ap
1530 {
1531 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
1532 {
1533 *s++ = '\'';
1534 ++stre;
1535
1536 g.push(s, stre - s);
1537 return stre;
1538 }
1539 }
1540 break;
1541 }
1542 case 'g': // &g
1543 {
1544 if (*++stre == 't' && *++stre == ';') // &gt;
1545 {
1546 *s++ = '>';
1547 ++stre;
1548
1549 g.push(s, stre - s);
1550 return stre;
1551 }
1552 break;
1553 }
1554 case 'l': // &l
1555 {
1556 if (*++stre == 't' && *++stre == ';') // &lt;
1557 {
1558 *s++ = '<';
1559 ++stre;
1560
1561 g.push(s, stre - s);
1562 return stre;
1563 }
1564 break;
1565 }
1566 case 'q': // &q
1567 {
1568 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
1569 {
1570 *s++ = '"';
1571 ++stre;
1572
1573 g.push(s, stre - s);
1574 return stre;
1575 }
1576 break;
1577 }
1578 }
1579
1580 return stre;
1581 }
1582
1583 // Utility macro for last character handling
1584 #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
1585
1586 char_t* strconv_comment(char_t* s, char_t endch)
1587 {
1588 gap g;
1589
1590 while (true)
1591 {
1592 while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s;
1593
1594 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1595 {
1596 *s++ = '\n'; // replace first one with 0x0a
1597
1598 if (*s == '\n') g.push(s, 1);
1599 }
1600 else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
1601 {
1602 *g.flush(s) = 0;
1603
1604 return s + (s[2] == '>' ? 3 : 2);
1605 }
1606 else if (*s == 0)
1607 {
1608 return 0;
1609 }
1610 else ++s;
1611 }
1612 }
1613
1614 char_t* strconv_cdata(char_t* s, char_t endch)
1615 {
1616 gap g;
1617
1618 while (true)
1619 {
1620 while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
1621
1622 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1623 {
1624 *s++ = '\n'; // replace first one with 0x0a
1625
1626 if (*s == '\n') g.push(s, 1);
1627 }
1628 else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
1629 {
1630 *g.flush(s) = 0;
1631
1632 return s + 1;
1633 }
1634 else if (*s == 0)
1635 {
1636 return 0;
1637 }
1638 else ++s;
1639 }
1640 }
1641
1642 typedef char_t* (*strconv_pcdata_t)(char_t*);
1643
1644 template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
1645 {
1646 static char_t* parse(char_t* s)
1647 {
1648 gap g;
1649
1650 while (true)
1651 {
1652 while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
1653
1654 if (*s == '<') // PCDATA ends here
1655 {
1656 *g.flush(s) = 0;
1657
1658 return s + 1;
1659 }
1660 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1661 {
1662 *s++ = '\n'; // replace first one with 0x0a
1663
1664 if (*s == '\n') g.push(s, 1);
1665 }
1666 else if (opt_escape::value && *s == '&')
1667 {
1668 s = strconv_escape(s, g);
1669 }
1670 else if (*s == 0)
1671 {
1672 return s;
1673 }
1674 else ++s;
1675 }
1676 }
1677 };
1678
1679 strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
1680 {
1681 STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20);
1682
1683 switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes)
1684 {
1685 case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse;
1686 case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse;
1687 case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse;
1688 case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse;
1689 default: return 0; // should not get here
1690 }
1691 }
1692
1693 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
1694
1695 template <typename opt_escape> struct strconv_attribute_impl
1696 {
1697 static char_t* parse_wnorm(char_t* s, char_t end_quote)
1698 {
1699 gap g;
1700
1701 // trim leading whitespaces
1702 if (IS_CHARTYPE(*s, ct_space))
1703 {
1704 char_t* str = s;
1705
1706 do ++str;
1707 while (IS_CHARTYPE(*str, ct_space));
1708
1709 g.push(s, str - s);
1710 }
1711
1712 while (true)
1713 {
1714 while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
1715
1716 if (*s == end_quote)
1717 {
1718 char_t* str = g.flush(s);
1719
1720 do *str-- = 0;
1721 while (IS_CHARTYPE(*str, ct_space));
1722
1723 return s + 1;
1724 }
1725 else if (IS_CHARTYPE(*s, ct_space))
1726 {
1727 *s++ = ' ';
1728
1729 if (IS_CHARTYPE(*s, ct_space))
1730 {
1731 char_t* str = s + 1;
1732 while (IS_CHARTYPE(*str, ct_space)) ++str;
1733
1734 g.push(s, str - s);
1735 }
1736 }
1737 else if (opt_escape::value && *s == '&')
1738 {
1739 s = strconv_escape(s, g);
1740 }
1741 else if (!*s)
1742 {
1743 return 0;
1744 }
1745 else ++s;
1746 }
1747 }
1748
1749 static char_t* parse_wconv(char_t* s, char_t end_quote)
1750 {
1751 gap g;
1752
1753 while (true)
1754 {
1755 while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
1756
1757 if (*s == end_quote)
1758 {
1759 *g.flush(s) = 0;
1760
1761 return s + 1;
1762 }
1763 else if (IS_CHARTYPE(*s, ct_space))
1764 {
1765 if (*s == '\r')
1766 {
1767 *s++ = ' ';
1768
1769 if (*s == '\n') g.push(s, 1);
1770 }
1771 else *s++ = ' ';
1772 }
1773 else if (opt_escape::value && *s == '&')
1774 {
1775 s = strconv_escape(s, g);
1776 }
1777 else if (!*s)
1778 {
1779 return 0;
1780 }
1781 else ++s;
1782 }
1783 }
1784
1785 static char_t* parse_eol(char_t* s, char_t end_quote)
1786 {
1787 gap g;
1788
1789 while (true)
1790 {
1791 while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s;
1792
1793 if (*s == end_quote)
1794 {
1795 *g.flush(s) = 0;
1796
1797 return s + 1;
1798 }
1799 else if (*s == '\r')
1800 {
1801 *s++ = '\n';
1802
1803 if (*s == '\n') g.push(s, 1);
1804 }
1805 else if (opt_escape::value && *s == '&')
1806 {
1807 s = strconv_escape(s, g);
1808 }
1809 else if (!*s)
1810 {
1811 return 0;
1812 }
1813 else ++s;
1814 }
1815 }
1816
1817 static char_t* parse_simple(char_t* s, char_t end_quote)
1818 {
1819 gap g;
1820
1821 while (true)
1822 {
1823 while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s;
1824
1825 if (*s == end_quote)
1826 {
1827 *g.flush(s) = 0;
1828
1829 return s + 1;
1830 }
1831 else if (opt_escape::value && *s == '&')
1832 {
1833 s = strconv_escape(s, g);
1834 }
1835 else if (!*s)
1836 {
1837 return 0;
1838 }
1839 else ++s;
1840 }
1841 }
1842 };
1843
1844 strconv_attribute_t get_strconv_attribute(unsigned int optmask)
1845 {
1846 STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
1847
1848 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
1849 {
1850 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
1851 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
1852 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
1853 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
1854 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
1855 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
1856 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
1857 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
1858 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
1859 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
1860 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
1861 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
1862 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
1863 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
1864 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
1865 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
1866 default: return 0; // should not get here
1867 }
1868 }
1869
1870 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
1871 {
1872 xml_parse_result result;
1873 result.status = status;
1874 result.offset = offset;
1875
1876 return result;
1877 }
1878
1879 struct xml_parser
1880 {
1881 xml_allocator alloc;
1882 char_t* error_offset;
1883 jmp_buf error_handler;
1884
1885 // Parser utilities.
1886 #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; }
1887 #define OPTSET(OPT) ( optmsk & OPT )
1888 #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) THROW_ERROR(status_out_of_memory, s); }
1889 #define POPNODE() { cursor = cursor->parent; }
1890 #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
1891 #define SCANWHILE(X) { while ((X)) ++s; }
1892 #define ENDSEG() { ch = *s; *s = 0; ++s; }
1893 #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err)
1894 #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); }
1895
1896 xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0)
1897 {
1898 }
1899
1900 // DOCTYPE consists of nested sections of the following possible types:
1901 // <!-- ... -->, <? ... ?>, "...", '...'
1902 // <![...]]>
1903 // <!...>
1904 // First group can not contain nested groups
1905 // Second group can contain nested groups of the same type
1906 // Third group can contain all other groups
1907 char_t* parse_doctype_primitive(char_t* s)
1908 {
1909 if (*s == '"' || *s == '\'')
1910 {
1911 // quoted string
1912 char_t ch = *s++;
1913 SCANFOR(*s == ch);
1914 if (!*s) THROW_ERROR(status_bad_doctype, s);
1915
1916 s++;
1917 }
1918 else if (s[0] == '<' && s[1] == '?')
1919 {
1920 // <? ... ?>
1921 s += 2;
1922 SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
1923 if (!*s) THROW_ERROR(status_bad_doctype, s);
1924
1925 s += 2;
1926 }
1927 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
1928 {
1929 s += 4;
1930 SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
1931 if (!*s) THROW_ERROR(status_bad_doctype, s);
1932
1933 s += 4;
1934 }
1935 else THROW_ERROR(status_bad_doctype, s);
1936
1937 return s;
1938 }
1939
1940 char_t* parse_doctype_ignore(char_t* s)
1941 {
1942 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
1943 s++;
1944
1945 while (*s)
1946 {
1947 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
1948 {
1949 // nested ignore section
1950 s = parse_doctype_ignore(s);
1951 }
1952 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
1953 {
1954 // ignore section end
1955 s += 3;
1956
1957 return s;
1958 }
1959 else s++;
1960 }
1961
1962 THROW_ERROR(status_bad_doctype, s);
1963
1964 return s;
1965 }
1966
1967 char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
1968 {
1969 assert(s[0] == '<' && s[1] == '!');
1970 s++;
1971
1972 while (*s)
1973 {
1974 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
1975 {
1976 if (s[2] == '[')
1977 {
1978 // ignore
1979 s = parse_doctype_ignore(s);
1980 }
1981 else
1982 {
1983 // some control group
1984 s = parse_doctype_group(s, endch, false);
1985 }
1986 }
1987 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
1988 {
1989 // unknown tag (forbidden), or some primitive group
1990 s = parse_doctype_primitive(s);
1991 }
1992 else if (*s == '>')
1993 {
1994 s++;
1995
1996 return s;
1997 }
1998 else s++;
1999 }
2000
2001 if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s);
2002
2003 return s;
2004 }
2005
2006 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
2007 {
2008 // parse node contents, starting with exclamation mark
2009 ++s;
2010
2011 if (*s == '-') // '<!-...'
2012 {
2013 ++s;
2014
2015 if (*s == '-') // '<!--...'
2016 {
2017 ++s;
2018
2019 if (OPTSET(parse_comments))
2020 {
2021 PUSHNODE(node_comment); // Append a new node on the tree.
2022 cursor->value = s; // Save the offset.
2023 }
2024
2025 if (OPTSET(parse_eol) && OPTSET(parse_comments))
2026 {
2027 s = strconv_comment(s, endch);
2028
2029 if (!s) THROW_ERROR(status_bad_comment, cursor->value);
2030 }
2031 else
2032 {
2033 // Scan for terminating '-->'.
2034 SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
2035 CHECK_ERROR(status_bad_comment, s);
2036
2037 if (OPTSET(parse_comments))
2038 *s = 0; // Zero-terminate this segment at the first terminating '-'.
2039
2040 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
2041 }
2042 }
2043 else THROW_ERROR(status_bad_comment, s);
2044 }
2045 else if (*s == '[')
2046 {
2047 // '<![CDATA[...'
2048 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
2049 {
2050 ++s;
2051
2052 if (OPTSET(parse_cdata))
2053 {
2054 PUSHNODE(node_cdata); // Append a new node on the tree.
2055 cursor->value = s; // Save the offset.
2056
2057 if (OPTSET(parse_eol))
2058 {
2059 s = strconv_cdata(s, endch);
2060
2061 if (!s) THROW_ERROR(status_bad_cdata, cursor->value);
2062 }
2063 else
2064 {
2065 // Scan for terminating ']]>'.
2066 SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2067 CHECK_ERROR(status_bad_cdata, s);
2068
2069 *s++ = 0; // Zero-terminate this segment.
2070 }
2071 }
2072 else // Flagged for discard, but we still have to scan for the terminator.
2073 {
2074 // Scan for terminating ']]>'.
2075 SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2076 CHECK_ERROR(status_bad_cdata, s);
2077
2078 ++s;
2079 }
2080
2081 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
2082 }
2083 else THROW_ERROR(status_bad_cdata, s);
2084 }
2085 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
2086 {
2087 s -= 2;
2088
2089 if (cursor->parent) THROW_ERROR(status_bad_doctype, s);
2090
2091 char_t* mark = s + 9;
2092
2093 s = parse_doctype_group(s, endch, true);
2094
2095 if (OPTSET(parse_doctype))
2096 {
2097 while (IS_CHARTYPE(*mark, ct_space)) ++mark;
2098
2099 PUSHNODE(node_doctype);
2100
2101 cursor->value = mark;
2102
2103 assert((s[0] == 0 && endch == '>') || s[-1] == '>');
2104 s[*s == 0 ? 0 : -1] = 0;
2105
2106 POPNODE();
2107 }
2108 }
2109 else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s);
2110 else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s);
2111 else THROW_ERROR(status_unrecognized_tag, s);
2112
2113 return s;
2114 }
2115
2116 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
2117 {
2118 // load into registers
2119 xml_node_struct* cursor = ref_cursor;
2120 char_t ch = 0;
2121
2122 // parse node contents, starting with question mark
2123 ++s;
2124
2125 // read PI target
2126 char_t* target = s;
2127
2128 if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s);
2129
2130 SCANWHILE(IS_CHARTYPE(*s, ct_symbol));
2131 CHECK_ERROR(status_bad_pi, s);
2132
2133 // determine node type; stricmp / strcasecmp is not portable
2134 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
2135
2136 if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi))
2137 {
2138 if (declaration)
2139 {
2140 // disallow non top-level declarations
2141 if (cursor->parent) THROW_ERROR(status_bad_pi, s);
2142
2143 PUSHNODE(node_declaration);
2144 }
2145 else
2146 {
2147 PUSHNODE(node_pi);
2148 }
2149
2150 cursor->name = target;
2151
2152 ENDSEG();
2153
2154 // parse value/attributes
2155 if (ch == '?')
2156 {
2157 // empty node
2158 if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
2159 s += (*s == '>');
2160
2161 POPNODE();
2162 }
2163 else if (IS_CHARTYPE(ch, ct_space))
2164 {
2165 SKIPWS();
2166
2167 // scan for tag end
2168 char_t* value = s;
2169
2170 SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2171 CHECK_ERROR(status_bad_pi, s);
2172
2173 if (declaration)
2174 {
2175 // replace ending ? with / so that 'element' terminates properly
2176 *s = '/';
2177
2178 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
2179 s = value;
2180 }
2181 else
2182 {
2183 // store value and step over >
2184 cursor->value = value;
2185 POPNODE();
2186
2187 ENDSEG();
2188
2189 s += (*s == '>');
2190 }
2191 }
2192 else THROW_ERROR(status_bad_pi, s);
2193 }
2194 else
2195 {
2196 // scan for tag end
2197 SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2198 CHECK_ERROR(status_bad_pi, s);
2199
2200 s += (s[1] == '>' ? 2 : 1);
2201 }
2202
2203 // store from registers
2204 ref_cursor = cursor;
2205
2206 return s;
2207 }
2208
2209 void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch)
2210 {
2211 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
2212 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
2213
2214 char_t ch = 0;
2215 xml_node_struct* cursor = xmldoc;
2216 char_t* mark = s;
2217
2218 while (*s != 0)
2219 {
2220 if (*s == '<')
2221 {
2222 ++s;
2223
2224 LOC_TAG:
2225 if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
2226 {
2227 PUSHNODE(node_element); // Append a new node to the tree.
2228
2229 cursor->name = s;
2230
2231 SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2232 ENDSEG(); // Save char in 'ch', terminate & step over.
2233
2234 if (ch == '>')
2235 {
2236 // end of tag
2237 }
2238 else if (IS_CHARTYPE(ch, ct_space))
2239 {
2240 LOC_ATTRIBUTES:
2241 while (true)
2242 {
2243 SKIPWS(); // Eat any whitespace.
2244
2245 if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
2246 {
2247 xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
2248 if (!a) THROW_ERROR(status_out_of_memory, s);
2249
2250 a->name = s; // Save the offset.
2251
2252 SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2253 CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2254
2255 ENDSEG(); // Save char in 'ch', terminate & step over.
2256 CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2257
2258 if (IS_CHARTYPE(ch, ct_space))
2259 {
2260 SKIPWS(); // Eat any whitespace.
2261 CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2262
2263 ch = *s;
2264 ++s;
2265 }
2266
2267 if (ch == '=') // '<... #=...'
2268 {
2269 SKIPWS(); // Eat any whitespace.
2270
2271 if (*s == '"' || *s == '\'') // '<... #="...'
2272 {
2273 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
2274 ++s; // Step over the quote.
2275 a->value = s; // Save the offset.
2276
2277 s = strconv_attribute(s, ch);
2278
2279 if (!s) THROW_ERROR(status_bad_attribute, a->value);
2280
2281 // After this line the loop continues from the start;
2282 // Whitespaces, / and > are ok, symbols and EOF are wrong,
2283 // everything else will be detected
2284 if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s);
2285 }
2286 else THROW_ERROR(status_bad_attribute, s);
2287 }
2288 else THROW_ERROR(status_bad_attribute, s);
2289 }
2290 else if (*s == '/')
2291 {
2292 ++s;
2293
2294 if (*s == '>')
2295 {
2296 POPNODE();
2297 s++;
2298 break;
2299 }
2300 else if (*s == 0 && endch == '>')
2301 {
2302 POPNODE();
2303 break;
2304 }
2305 else THROW_ERROR(status_bad_start_element, s);
2306 }
2307 else if (*s == '>')
2308 {
2309 ++s;
2310
2311 break;
2312 }
2313 else if (*s == 0 && endch == '>')
2314 {
2315 break;
2316 }
2317 else THROW_ERROR(status_bad_start_element, s);
2318 }
2319
2320 // !!!
2321 }
2322 else if (ch == '/') // '<#.../'
2323 {
2324 if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s);
2325
2326 POPNODE(); // Pop.
2327
2328 s += (*s == '>');
2329 }
2330 else if (ch == 0)
2331 {
2332 // we stepped over null terminator, backtrack & handle closing tag
2333 --s;
2334
2335 if (endch != '>') THROW_ERROR(status_bad_start_element, s);
2336 }
2337 else THROW_ERROR(status_bad_start_element, s);
2338 }
2339 else if (*s == '/')
2340 {
2341 ++s;
2342
2343 char_t* name = cursor->name;
2344 if (!name) THROW_ERROR(status_end_element_mismatch, s);
2345
2346 while (IS_CHARTYPE(*s, ct_symbol))
2347 {
2348 if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s);
2349 }
2350
2351 if (*name)
2352 {
2353 if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s);
2354 else THROW_ERROR(status_end_element_mismatch, s);
2355 }
2356
2357 POPNODE(); // Pop.
2358
2359 SKIPWS();
2360
2361 if (*s == 0)
2362 {
2363 if (endch != '>') THROW_ERROR(status_bad_end_element, s);
2364 }
2365 else
2366 {
2367 if (*s != '>') THROW_ERROR(status_bad_end_element, s);
2368 ++s;
2369 }
2370 }
2371 else if (*s == '?') // '<?...'
2372 {
2373 s = parse_question(s, cursor, optmsk, endch);
2374
2375 assert(cursor);
2376 if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
2377 }
2378 else if (*s == '!') // '<!...'
2379 {
2380 s = parse_exclamation(s, cursor, optmsk, endch);
2381 }
2382 else if (*s == 0 && endch == '?') THROW_ERROR(status_bad_pi, s);
2383 else THROW_ERROR(status_unrecognized_tag, s);
2384 }
2385 else
2386 {
2387 mark = s; // Save this offset while searching for a terminator.
2388
2389 SKIPWS(); // Eat whitespace if no genuine PCDATA here.
2390
2391 if ((!OPTSET(parse_ws_pcdata) || mark == s) && (*s == '<' || !*s))
2392 {
2393 continue;
2394 }
2395
2396 s = mark;
2397
2398 if (cursor->parent)
2399 {
2400 PUSHNODE(node_pcdata); // Append a new node on the tree.
2401 cursor->value = s; // Save the offset.
2402
2403 s = strconv_pcdata(s);
2404
2405 POPNODE(); // Pop since this is a standalone.
2406
2407 if (!*s) break;
2408 }
2409 else
2410 {
2411 SCANFOR(*s == '<'); // '...<'
2412 if (!*s) break;
2413
2414 ++s;
2415 }
2416
2417 // We're after '<'
2418 goto LOC_TAG;
2419 }
2420 }
2421
2422 // check that last tag is closed
2423 if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s);
2424 }
2425
2426 static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk)
2427 {
2428 xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root);
2429
2430 // store buffer for offset_debug
2431 xmldoc->buffer = buffer;
2432
2433 // early-out for empty documents
2434 if (length == 0) return make_parse_result(status_ok);
2435
2436 // create parser on stack
2437 xml_parser parser(*xmldoc);
2438
2439 // save last character and make buffer zero-terminated (speeds up parsing)
2440 char_t endch = buffer[length - 1];
2441 buffer[length - 1] = 0;
2442
2443 // perform actual parsing
2444 int error = setjmp(parser.error_handler);
2445
2446 if (error == 0)
2447 {
2448 parser.parse(buffer, xmldoc, optmsk, endch);
2449 }
2450
2451 xml_parse_result result = make_parse_result(static_cast<xml_parse_status>(error), parser.error_offset ? parser.error_offset - buffer : 0);
2452 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
2453
2454 // update allocator state
2455 *static_cast<xml_allocator*>(xmldoc) = parser.alloc;
2456
2457 // since we removed last character, we have to handle the only possible false positive
2458 if (result && endch == '<')
2459 {
2460 // there's no possible well-formed document with < at the end
2461 return make_parse_result(status_unrecognized_tag, length);
2462 }
2463
2464 return result;
2465 }
2466 };
2467
2468 // Output facilities
2469 xml_encoding get_write_native_encoding()
2470 {
2471 #ifdef PUGIXML_WCHAR_MODE
2472 return get_wchar_encoding();
2473 #else
2474 return encoding_utf8;
2475 #endif
2476 }
2477
2478 xml_encoding get_write_encoding(xml_encoding encoding)
2479 {
2480 // replace wchar encoding with utf implementation
2481 if (encoding == encoding_wchar) return get_wchar_encoding();
2482
2483 // replace utf16 encoding with utf16 with specific endianness
2484 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2485
2486 // replace utf32 encoding with utf32 with specific endianness
2487 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2488
2489 // only do autodetection if no explicit encoding is requested
2490 if (encoding != encoding_auto) return encoding;
2491
2492 // assume utf8 encoding
2493 return encoding_utf8;
2494 }
2495
2496#ifdef PUGIXML_WCHAR_MODE
2497 size_t get_valid_length(const char_t* data, size_t length)
2498 {
2499 assert(length > 0);
2500
2501 // discard last character if it's the lead of a surrogate pair
2502 return (sizeof(wchar_t) == 2 && (unsigned)(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
2503 }
2504
2505 size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding)
2506 {
2507 // only endian-swapping is required
2508 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
2509 {
2510 convert_wchar_endian_swap(reinterpret_cast<char_t*>(result), data, length);
2511
2512 return length * sizeof(char_t);
2513 }
2514
2515 // convert to utf8
2516 if (encoding == encoding_utf8)
2517 {
2518 uint8_t* dest = reinterpret_cast<uint8_t*>(result);
2519
2520 uint8_t* end = sizeof(wchar_t) == 2 ?
2521 utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(data), length, dest) :
2522 utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(data), length, dest);
2523
2524 return static_cast<size_t>(end - dest);
2525 }
2526
2527 // convert to utf16
2528 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2529 {
2530 uint16_t* dest = reinterpret_cast<uint16_t*>(result);
2531
2532 // convert to native utf16
2533 uint16_t* end = utf_decoder<utf16_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(data), length, dest);
2534
2535 // swap if necessary
2536 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2537
2538 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2539
2540 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2541 }
2542
2543 // convert to utf32
2544 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2545 {
2546 uint32_t* dest = reinterpret_cast<uint32_t*>(result);
2547
2548 // convert to native utf32
2549 uint32_t* end = utf_decoder<utf32_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(data), length, dest);
2550
2551 // swap if necessary
2552 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2553
2554 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2555
2556 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2557 }
2558
2559 assert(!"Invalid encoding");
2560 return 0;
2561 }
2562#else
2563 size_t get_valid_length(const char_t* data, size_t length)
2564 {
2565 assert(length > 4);
2566
2567 for (size_t i = 1; i <= 4; ++i)
2568 {
2569 uint8_t ch = static_cast<uint8_t>(data[length - i]);
2570
2571 // either a standalone character or a leading one
2572 if ((ch & 0xc0) != 0x80) return length - i;
2573 }
2574
2575 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
2576 return length;
2577 }
2578
2579 size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding)
2580 {
2581 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2582 {
2583 uint16_t* dest = reinterpret_cast<uint16_t*>(result);
2584
2585 // convert to native utf16
2586 uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2587
2588 // swap if necessary
2589 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2590
2591 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2592
2593 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2594 }
2595
2596 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2597 {
2598 uint32_t* dest = reinterpret_cast<uint32_t*>(result);
2599
2600 // convert to native utf32
2601 uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2602
2603 // swap if necessary
2604 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2605
2606 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2607
2608 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2609 }
2610
2611 assert(!"Invalid encoding");
2612 return 0;
2613 }
2614#endif
2615
2616 class xml_buffered_writer
2617 {
2618 xml_buffered_writer(const xml_buffered_writer&);
2619 xml_buffered_writer& operator=(const xml_buffered_writer&);
2620
2621 public:
2622 xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding))
2623 {
2624 }
2625
2626 ~xml_buffered_writer()
2627 {
2628 flush();
2629 }
2630
2631 void flush()
2632 {
2633 flush(buffer, bufsize);
2634 bufsize = 0;
2635 }
2636
2637 void flush(const char_t* data, size_t size)
2638 {
2639 if (size == 0) return;
2640
2641 // fast path, just write data
2642 if (encoding == get_write_native_encoding())
2643 writer.write(data, size * sizeof(char_t));
2644 else
2645 {
2646 // convert chunk
2647 size_t result = convert_buffer(scratch, data, size, encoding);
2648 assert(result <= sizeof(scratch));
2649
2650 // write data
2651 writer.write(scratch, result);
2652 }
2653 }
2654
2655 void write(const char_t* data, size_t length)
2656 {
2657 if (bufsize + length > bufcapacity)
2658 {
2659 // flush the remaining buffer contents
2660 flush();
2661
2662 // handle large chunks
2663 if (length > bufcapacity)
2664 {
2665 if (encoding == get_write_native_encoding())
2666 {
2667 // fast path, can just write data chunk
2668 writer.write(data, length * sizeof(char_t));
2669 return;
2670 }
2671
2672 // need to convert in suitable chunks
2673 while (length > bufcapacity)
2674 {
2675 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
2676 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
2677 size_t chunk_size = get_valid_length(data, bufcapacity);
2678
2679 // convert chunk and write
2680 flush(data, chunk_size);
2681
2682 // iterate
2683 data += chunk_size;
2684 length -= chunk_size;
2685 }
2686
2687 // small tail is copied below
2688 bufsize = 0;
2689 }
2690 }
2691
2692 memcpy(buffer + bufsize, data, length * sizeof(char_t));
2693 bufsize += length;
2694 }
2695
2696 void write(const char_t* data)
2697 {
2698 write(data, strlength(data));
2699 }
2700
2701 void write(char_t d0)
2702 {
2703 if (bufsize + 1 > bufcapacity) flush();
2704
2705 buffer[bufsize + 0] = d0;
2706 bufsize += 1;
2707 }
2708
2709 void write(char_t d0, char_t d1)
2710 {
2711 if (bufsize + 2 > bufcapacity) flush();
2712
2713 buffer[bufsize + 0] = d0;
2714 buffer[bufsize + 1] = d1;
2715 bufsize += 2;
2716 }
2717
2718 void write(char_t d0, char_t d1, char_t d2)
2719 {
2720 if (bufsize + 3 > bufcapacity) flush();
2721
2722 buffer[bufsize + 0] = d0;
2723 buffer[bufsize + 1] = d1;
2724 buffer[bufsize + 2] = d2;
2725 bufsize += 3;
2726 }
2727
2728 void write(char_t d0, char_t d1, char_t d2, char_t d3)
2729 {
2730 if (bufsize + 4 > bufcapacity) flush();
2731
2732 buffer[bufsize + 0] = d0;
2733 buffer[bufsize + 1] = d1;
2734 buffer[bufsize + 2] = d2;
2735 buffer[bufsize + 3] = d3;
2736 bufsize += 4;
2737 }
2738
2739 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
2740 {
2741 if (bufsize + 5 > bufcapacity) flush();
2742
2743 buffer[bufsize + 0] = d0;
2744 buffer[bufsize + 1] = d1;
2745 buffer[bufsize + 2] = d2;
2746 buffer[bufsize + 3] = d3;
2747 buffer[bufsize + 4] = d4;
2748 bufsize += 5;
2749 }
2750
2751 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
2752 {
2753 if (bufsize + 6 > bufcapacity) flush();
2754
2755 buffer[bufsize + 0] = d0;
2756 buffer[bufsize + 1] = d1;
2757 buffer[bufsize + 2] = d2;
2758 buffer[bufsize + 3] = d3;
2759 buffer[bufsize + 4] = d4;
2760 buffer[bufsize + 5] = d5;
2761 bufsize += 6;
2762 }
2763
2764 // utf8 maximum expansion: x4 (-> utf32)
2765 // utf16 maximum expansion: x2 (-> utf32)
2766 // utf32 maximum expansion: x1
2767 enum { bufcapacity = 2048 };
2768
2769 char_t buffer[bufcapacity];
2770 char scratch[4 * bufcapacity];
2771
2772 xml_writer& writer;
2773 size_t bufsize;
2774 xml_encoding encoding;
2775 };
2776
2777 void write_bom(xml_writer& writer, xml_encoding encoding)
2778 {
2779 switch (encoding)
2780 {
2781 case encoding_utf8:
2782 writer.write("\xef\xbb\xbf", 3);
2783 break;
2784
2785 case encoding_utf16_be:
2786 writer.write("\xfe\xff", 2);
2787 break;
2788
2789 case encoding_utf16_le:
2790 writer.write("\xff\xfe", 2);
2791 break;
2792
2793 case encoding_utf32_be:
2794 writer.write("\x00\x00\xfe\xff", 4);
2795 break;
2796
2797 case encoding_utf32_le:
2798 writer.write("\xff\xfe\x00\x00", 4);
2799 break;
2800
2801 default:
2802 assert(!"Invalid encoding");
2803 }
2804 }
2805
2806 void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
2807 {
2808 while (*s)
2809 {
2810 const char_t* prev = s;
2811
2812 // While *s is a usual symbol
2813 while (!IS_CHARTYPEX(*s, type)) ++s;
2814
2815 writer.write(prev, static_cast<size_t>(s - prev));
2816
2817 switch (*s)
2818 {
2819 case 0: break;
2820 case '&':
2821 writer.write('&', 'a', 'm', 'p', ';');
2822 ++s;
2823 break;
2824 case '<':
2825 writer.write('&', 'l', 't', ';');
2826 ++s;
2827 break;
2828 case '>':
2829 writer.write('&', 'g', 't', ';');
2830 ++s;
2831 break;
2832 case '"':
2833 writer.write('&', 'q', 'u', 'o', 't', ';');
2834 ++s;
2835 break;
2836 default: // s is not a usual symbol
2837 {
2838 unsigned int ch = static_cast<unsigned int>(*s++);
2839 assert(ch < 32);
2840
2841 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
2842 }
2843 }
2844 }
2845 }
2846
2847 void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
2848 {
2849 do
2850 {
2851 writer.write('<', '!', '[', 'C', 'D');
2852 writer.write('A', 'T', 'A', '[');
2853
2854 const char_t* prev = s;
2855
2856 // look for ]]> sequence - we can't output it as is since it terminates CDATA
2857 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
2858
2859 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
2860 if (*s) s += 2;
2861
2862 writer.write(prev, static_cast<size_t>(s - prev));
2863
2864 writer.write(']', ']', '>');
2865 }
2866 while (*s);
2867 }
2868
2869 void node_output_attributes(xml_buffered_writer& writer, const xml_node& node)
2870 {
2871 const char_t* default_name = PUGIXML_TEXT(":anonymous");
2872
2873 for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
2874 {
2875 writer.write(' ');
2876 writer.write(a.name()[0] ? a.name() : default_name);
2877 writer.write('=', '"');
2878
2879 text_output_escaped(writer, a.value(), ctx_special_attr);
2880
2881 writer.write('"');
2882 }
2883 }
2884
2885 void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
2886 {
2887 const char_t* default_name = PUGIXML_TEXT(":anonymous");
2888
2889 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
2890 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
2891
2892 switch (node.type())
2893 {
2894 case node_document:
2895 {
2896 for (xml_node n = node.first_child(); n; n = n.next_sibling())
2897 node_output(writer, n, indent, flags, depth);
2898 break;
2899 }
2900
2901 case node_element:
2902 {
2903 const char_t* name = node.name()[0] ? node.name() : default_name;
2904
2905 writer.write('<');
2906 writer.write(name);
2907
2908 node_output_attributes(writer, node);
2909
2910 if (flags & format_raw)
2911 {
2912 if (!node.first_child())
2913 writer.write(' ', '/', '>');
2914 else
2915 {
2916 writer.write('>');
2917
2918 for (xml_node n = node.first_child(); n; n = n.next_sibling())
2919 node_output(writer, n, indent, flags, depth + 1);
2920
2921 writer.write('<', '/');
2922 writer.write(name);
2923 writer.write('>');
2924 }
2925 }
2926 else if (!node.first_child())
2927 writer.write(' ', '/', '>', '\n');
2928 else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
2929 {
2930 writer.write('>');
2931
2932 if (node.first_child().type() == node_pcdata)
2933 text_output_escaped(writer, node.first_child().value(), ctx_special_pcdata);
2934 else
2935 text_output_cdata(writer, node.first_child().value());
2936
2937 writer.write('<', '/');
2938 writer.write(name);
2939 writer.write('>', '\n');
2940 }
2941 else
2942 {
2943 writer.write('>', '\n');
2944
2945 for (xml_node n = node.first_child(); n; n = n.next_sibling())
2946 node_output(writer, n, indent, flags, depth + 1);
2947
2948 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
2949 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
2950
2951 writer.write('<', '/');
2952 writer.write(name);
2953 writer.write('>', '\n');
2954 }
2955
2956 break;
2957 }
2958
2959 case node_pcdata:
2960 text_output_escaped(writer, node.value(), ctx_special_pcdata);
2961 if ((flags & format_raw) == 0) writer.write('\n');
2962 break;
2963
2964 case node_cdata:
2965 text_output_cdata(writer, node.value());
2966 if ((flags & format_raw) == 0) writer.write('\n');
2967 break;
2968
2969 case node_comment:
2970 writer.write('<', '!', '-', '-');
2971 writer.write(node.value());
2972 writer.write('-', '-', '>');
2973 if ((flags & format_raw) == 0) writer.write('\n');
2974 break;
2975
2976 case node_pi:
2977 case node_declaration:
2978 writer.write('<', '?');
2979 writer.write(node.name()[0] ? node.name() : default_name);
2980
2981 if (node.type() == node_declaration)
2982 {
2983 node_output_attributes(writer, node);
2984 }
2985 else if (node.value()[0])
2986 {
2987 writer.write(' ');
2988 writer.write(node.value());
2989 }
2990
2991 writer.write('?', '>');
2992 if ((flags & format_raw) == 0) writer.write('\n');
2993 break;
2994
2995 case node_doctype:
2996 writer.write('<', '!', 'D', 'O', 'C');
2997 writer.write('T', 'Y', 'P', 'E');
2998
2999 if (node.value()[0])
3000 {
3001 writer.write(' ');
3002 writer.write(node.value());
3003 }
3004
3005 writer.write('>');
3006 if ((flags & format_raw) == 0) writer.write('\n');
3007 break;
3008
3009 default:
3010 assert(!"Invalid node type");
3011 }
3012 }
3013
3014 inline bool has_declaration(const xml_node& node)
3015 {
3016 for (xml_node child = node.first_child(); child; child = child.next_sibling())
3017 {
3018 xml_node_type type = child.type();
3019
3020 if (type == node_declaration) return true;
3021 if (type == node_element) return false;
3022 }
3023
3024 return false;
3025 }
3026
3027 inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
3028 {
3029 if (parent != node_document && parent != node_element) return false;
3030 if (child == node_document || child == node_null) return false;
3031 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
3032
3033 return true;
3034 }
3035
3036 void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
3037 {
3038 assert(dest.type() == source.type());
3039
3040 switch (source.type())
3041 {
3042 case node_element:
3043 {
3044 dest.set_name(source.name());
3045
3046 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3047 dest.append_attribute(a.name()).set_value(a.value());
3048
3049 for (xml_node c = source.first_child(); c; c = c.next_sibling())
3050 {
3051 if (c == skip) continue;
3052
3053 xml_node cc = dest.append_child(c.type());
3054 assert(cc);
3055
3056 recursive_copy_skip(cc, c, skip);
3057 }
3058
3059 break;
3060 }
3061
3062 case node_pcdata:
3063 case node_cdata:
3064 case node_comment:
3065 case node_doctype:
3066 dest.set_value(source.value());
3067 break;
3068
3069 case node_pi:
3070 dest.set_name(source.name());
3071 dest.set_value(source.value());
3072 break;
3073
3074 case node_declaration:
3075 {
3076 dest.set_name(source.name());
3077
3078 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3079 dest.append_attribute(a.name()).set_value(a.value());
3080
3081 break;
3082 }
3083
3084 default:
3085 assert(!"Invalid node type");
3086 }
3087 }
3088
3089 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
3090 xml_parse_status get_file_size(FILE* file, size_t& out_result)
3091 {
3092 #if defined(_MSC_VER) && _MSC_VER >= 1400
3093 // there are 64-bit versions of fseek/ftell, let's use them
3094 typedef __int64 length_type;
3095
3096 _fseeki64(file, 0, SEEK_END);
3097 length_type length = _ftelli64(file);
3098 _fseeki64(file, 0, SEEK_SET);
3099 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
3100 // there are 64-bit versions of fseek/ftell, let's use them
3101 typedef off64_t length_type;
3102
3103 fseeko64(file, 0, SEEK_END);
3104 length_type length = ftello64(file);
3105 fseeko64(file, 0, SEEK_SET);
3106 #else
3107 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
3108 typedef long length_type;
3109
3110 fseek(file, 0, SEEK_END);
3111 length_type length = ftell(file);
3112 fseek(file, 0, SEEK_SET);
3113 #endif
3114
3115 // check for I/O errors
3116 if (length < 0) return status_io_error;
3117
3118 // check for overflow
3119 size_t result = static_cast<size_t>(length);
3120
3121 if (static_cast<length_type>(result) != length) return status_out_of_memory;
3122
3123 // finalize
3124 out_result = result;
3125
3126 return status_ok;
3127 }
3128
3129 xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
3130 {
3131 if (!file) return make_parse_result(status_file_not_found);
3132
3133 // get file size (can result in I/O errors)
3134 size_t size = 0;
3135 xml_parse_status size_status = get_file_size(file, size);
3136
3137 if (size_status != status_ok)
3138 {
3139 fclose(file);
3140 return make_parse_result(size_status);
3141 }
3142
3143 // allocate buffer for the whole file
3144 char* contents = static_cast<char*>(global_allocate(size > 0 ? size : 1));
3145
3146 if (!contents)
3147 {
3148 fclose(file);
3149 return make_parse_result(status_out_of_memory);
3150 }
3151
3152 // read file in memory
3153 size_t read_size = fread(contents, 1, size, file);
3154 fclose(file);
3155
3156 if (read_size != size)
3157 {
3158 global_deallocate(contents);
3159 return make_parse_result(status_io_error);
3160 }
3161
3162 return doc.load_buffer_inplace_own(contents, size, options, encoding);
3163 }
3164
3165#ifndef PUGIXML_NO_STL
3166 template <typename T> xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
3167 {
3168 // get length of remaining data in stream
3169 typename std::basic_istream<T>::pos_type pos = stream.tellg();
3170 stream.seekg(0, std::ios::end);
3171 std::streamoff length = stream.tellg() - pos;
3172 stream.seekg(pos);
3173
3174 if (stream.fail() || pos < 0) return make_parse_result(status_io_error);
3175
3176 // guard against huge files
3177 size_t read_length = static_cast<size_t>(length);
3178
3179 if (static_cast<std::streamsize>(read_length) != length || length < 0) return make_parse_result(status_out_of_memory);
3180
3181 // read stream data into memory (guard against stream exceptions with buffer holder)
3182 buffer_holder buffer(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T)), global_deallocate);
3183 if (!buffer.data) return make_parse_result(status_out_of_memory);
3184
3185 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
3186
3187 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
3188 if (stream.bad()) return make_parse_result(status_io_error);
3189
3190 // load data from buffer
3191 size_t actual_length = static_cast<size_t>(stream.gcount());
3192 assert(actual_length <= read_length);
3193
3194 return doc.load_buffer_inplace_own(buffer.release(), actual_length * sizeof(T), options, encoding);
3195 }
3196#endif
3197
3198#if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__MINGW32__)
3199 FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
3200 {
3201 return _wfopen(path, mode);
3202 }
3203#else
3204 char* convert_path_heap(const wchar_t* str)
3205 {
3206 assert(str);
3207
3208 // first pass: get length in utf8 characters
3209 size_t length = wcslen(str);
3210 size_t size = as_utf8_begin(str, length);
3211
3212 // allocate resulting string
3213 char* result = static_cast<char*>(global_allocate(size + 1));
3214 if (!result) return 0;
3215
3216 // second pass: convert to utf8
3217 as_utf8_end(result, size, str, length);
3218
3219 return result;
3220 }
3221
3222 FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
3223 {
3224 // there is no standard function to open wide paths, so our best bet is to try utf8 path
3225 char* path_utf8 = convert_path_heap(path);
3226 if (!path_utf8) return 0;
3227
3228 // convert mode to ASCII (we mirror _wfopen interface)
3229 char mode_ascii[4] = {0};
3230 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
3231
3232 // try to open the utf8 path
3233 FILE* result = fopen(path_utf8, mode_ascii);
3234
3235 // free dummy buffer
3236 global_deallocate(path_utf8);
3237
3238 return result;
3239 }
3240#endif
3241}
3242
3243namespace pugi
3244{
3245 xml_writer_file::xml_writer_file(void* file): file(file)
3246 {
3247 }
3248
3249 void xml_writer_file::write(const void* data, size_t size)
3250 {
3251 fwrite(data, size, 1, static_cast<FILE*>(file));
3252 }
3253
3254#ifndef PUGIXML_NO_STL
3255 xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
3256 {
3257 }
3258
3259 xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
3260 {
3261 }
3262
3263 void xml_writer_stream::write(const void* data, size_t size)
3264 {
3265 if (narrow_stream)
3266 {
3267 assert(!wide_stream);
3268 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
3269 }
3270 else
3271 {
3272 assert(wide_stream);
3273 assert(size % sizeof(wchar_t) == 0);
3274
3275 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
3276 }
3277 }
3278#endif
3279
3280 xml_tree_walker::xml_tree_walker(): _depth(0)
3281 {
3282 }
3283
3284 xml_tree_walker::~xml_tree_walker()
3285 {
3286 }
3287
3288 int xml_tree_walker::depth() const
3289 {
3290 return _depth;
3291 }
3292
3293 bool xml_tree_walker::begin(xml_node&)
3294 {
3295 return true;
3296 }
3297
3298 bool xml_tree_walker::end(xml_node&)
3299 {
3300 return true;
3301 }
3302
3303 xml_attribute::xml_attribute(): _attr(0)
3304 {
3305 }
3306
3307 xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
3308 {
3309 }
3310
3311 xml_attribute::operator xml_attribute::unspecified_bool_type() const
3312 {
3313 return _attr ? &xml_attribute::_attr : 0;
3314 }
3315
3316 bool xml_attribute::operator!() const
3317 {
3318 return !_attr;
3319 }
3320
3321 bool xml_attribute::operator==(const xml_attribute& r) const
3322 {
3323 return (_attr == r._attr);
3324 }
3325
3326 bool xml_attribute::operator!=(const xml_attribute& r) const
3327 {
3328 return (_attr != r._attr);
3329 }
3330
3331 bool xml_attribute::operator<(const xml_attribute& r) const
3332 {
3333 return (_attr < r._attr);
3334 }
3335
3336 bool xml_attribute::operator>(const xml_attribute& r) const
3337 {
3338 return (_attr > r._attr);
3339 }
3340
3341 bool xml_attribute::operator<=(const xml_attribute& r) const
3342 {
3343 return (_attr <= r._attr);
3344 }
3345
3346 bool xml_attribute::operator>=(const xml_attribute& r) const
3347 {
3348 return (_attr >= r._attr);
3349 }
3350
3351 xml_attribute xml_attribute::next_attribute() const
3352 {
3353 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
3354 }
3355
3356 xml_attribute xml_attribute::previous_attribute() const
3357 {
3358 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
3359 }
3360
3361 int xml_attribute::as_int() const
3362 {
3363 if (!_attr || !_attr->value) return 0;
3364
3365 #ifdef PUGIXML_WCHAR_MODE
3366 return (int)wcstol(_attr->value, 0, 10);
3367 #else
3368 return (int)strtol(_attr->value, 0, 10);
3369 #endif
3370 }
3371
3372 unsigned int xml_attribute::as_uint() const
3373 {
3374 if (!_attr || !_attr->value) return 0;
3375
3376 #ifdef PUGIXML_WCHAR_MODE
3377 return (unsigned int)wcstoul(_attr->value, 0, 10);
3378 #else
3379 return (unsigned int)strtoul(_attr->value, 0, 10);
3380 #endif
3381 }
3382
3383 double xml_attribute::as_double() const
3384 {
3385 if (!_attr || !_attr->value) return 0;
3386
3387 #ifdef PUGIXML_WCHAR_MODE
3388 return wcstod(_attr->value, 0);
3389 #else
3390 return strtod(_attr->value, 0);
3391 #endif
3392 }
3393
3394 float xml_attribute::as_float() const
3395 {
3396 if (!_attr || !_attr->value) return 0;
3397
3398 #ifdef PUGIXML_WCHAR_MODE
3399 return (float)wcstod(_attr->value, 0);
3400 #else
3401 return (float)strtod(_attr->value, 0);
3402 #endif
3403 }
3404
3405 bool xml_attribute::as_bool() const
3406 {
3407 if (!_attr || !_attr->value) return false;
3408
3409 // only look at first char
3410 char_t first = *_attr->value;
3411
3412 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
3413 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
3414 }
3415
3416 bool xml_attribute::empty() const
3417 {
3418 return !_attr;
3419 }
3420
3421 const char_t* xml_attribute::name() const
3422 {
3423 return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
3424 }
3425
3426 const char_t* xml_attribute::value() const
3427 {
3428 return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
3429 }
3430
3431 size_t xml_attribute::hash_value() const
3432 {
3433 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
3434 }
3435
3436 xml_attribute_struct* xml_attribute::internal_object() const
3437 {
3438 return _attr;
3439 }
3440
3441 xml_attribute& xml_attribute::operator=(const char_t* rhs)
3442 {
3443 set_value(rhs);
3444 return *this;
3445 }
3446
3447 xml_attribute& xml_attribute::operator=(int rhs)
3448 {
3449 set_value(rhs);
3450 return *this;
3451 }
3452
3453 xml_attribute& xml_attribute::operator=(unsigned int rhs)
3454 {
3455 set_value(rhs);
3456 return *this;
3457 }
3458
3459 xml_attribute& xml_attribute::operator=(double rhs)
3460 {
3461 set_value(rhs);
3462 return *this;
3463 }
3464
3465 xml_attribute& xml_attribute::operator=(bool rhs)
3466 {
3467 set_value(rhs);
3468 return *this;
3469 }
3470
3471 bool xml_attribute::set_name(const char_t* rhs)
3472 {
3473 if (!_attr) return false;
3474
3475 return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs);
3476 }
3477
3478 bool xml_attribute::set_value(const char_t* rhs)
3479 {
3480 if (!_attr) return false;
3481
3482 return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs);
3483 }
3484
3485 bool xml_attribute::set_value(int rhs)
3486 {
3487 char buf[128];
3488 sprintf(buf, "%d", rhs);
3489
3490 #ifdef PUGIXML_WCHAR_MODE
3491 char_t wbuf[128];
3492 widen_ascii(wbuf, buf);
3493
3494 return set_value(wbuf);
3495 #else
3496 return set_value(buf);
3497 #endif
3498 }
3499
3500 bool xml_attribute::set_value(unsigned int rhs)
3501 {
3502 char buf[128];
3503 sprintf(buf, "%u", rhs);
3504
3505 #ifdef PUGIXML_WCHAR_MODE
3506 char_t wbuf[128];
3507 widen_ascii(wbuf, buf);
3508
3509 return set_value(wbuf);
3510 #else
3511 return set_value(buf);
3512 #endif
3513 }
3514
3515 bool xml_attribute::set_value(double rhs)
3516 {
3517 char buf[128];
3518 sprintf(buf, "%g", rhs);
3519
3520 #ifdef PUGIXML_WCHAR_MODE
3521 char_t wbuf[128];
3522 widen_ascii(wbuf, buf);
3523
3524 return set_value(wbuf);
3525 #else
3526 return set_value(buf);
3527 #endif
3528 }
3529
3530 bool xml_attribute::set_value(bool rhs)
3531 {
3532 return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
3533 }
3534
3535#ifdef __BORLANDC__
3536 bool operator&&(const xml_attribute& lhs, bool rhs)
3537 {
3538 return (bool)lhs && rhs;
3539 }
3540
3541 bool operator||(const xml_attribute& lhs, bool rhs)
3542 {
3543 return (bool)lhs || rhs;
3544 }
3545#endif
3546
3547 xml_node::xml_node(): _root(0)
3548 {
3549 }
3550
3551 xml_node::xml_node(xml_node_struct* p): _root(p)
3552 {
3553 }
3554
3555 xml_node::operator xml_node::unspecified_bool_type() const
3556 {
3557 return _root ? &xml_node::_root : 0;
3558 }
3559
3560 bool xml_node::operator!() const
3561 {
3562 return !_root;
3563 }
3564
3565 xml_node::iterator xml_node::begin() const
3566 {
3567 return iterator(_root ? _root->first_child : 0, _root);
3568 }
3569
3570 xml_node::iterator xml_node::end() const
3571 {
3572 return iterator(0, _root);
3573 }
3574
3575 xml_node::attribute_iterator xml_node::attributes_begin() const
3576 {
3577 return attribute_iterator(_root ? _root->first_attribute : 0, _root);
3578 }
3579
3580 xml_node::attribute_iterator xml_node::attributes_end() const
3581 {
3582 return attribute_iterator(0, _root);
3583 }
3584
3585 bool xml_node::operator==(const xml_node& r) const
3586 {
3587 return (_root == r._root);
3588 }
3589
3590 bool xml_node::operator!=(const xml_node& r) const
3591 {
3592 return (_root != r._root);
3593 }
3594
3595 bool xml_node::operator<(const xml_node& r) const
3596 {
3597 return (_root < r._root);
3598 }
3599
3600 bool xml_node::operator>(const xml_node& r) const
3601 {
3602 return (_root > r._root);
3603 }
3604
3605 bool xml_node::operator<=(const xml_node& r) const
3606 {
3607 return (_root <= r._root);
3608 }
3609
3610 bool xml_node::operator>=(const xml_node& r) const
3611 {
3612 return (_root >= r._root);
3613 }
3614
3615 bool xml_node::empty() const
3616 {
3617 return !_root;
3618 }
3619
3620 const char_t* xml_node::name() const
3621 {
3622 return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
3623 }
3624
3625 xml_node_type xml_node::type() const
3626 {
3627 return _root ? static_cast<xml_node_type>((_root->header & xml_memory_page_type_mask) + 1) : node_null;
3628 }
3629
3630 const char_t* xml_node::value() const
3631 {
3632 return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
3633 }
3634
3635 xml_node xml_node::child(const char_t* name) const
3636 {
3637 if (!_root) return xml_node();
3638
3639 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
3640 if (i->name && strequal(name, i->name)) return xml_node(i);
3641
3642 return xml_node();
3643 }
3644
3645 xml_attribute xml_node::attribute(const char_t* name) const
3646 {
3647 if (!_root) return xml_attribute();
3648
3649 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
3650 if (i->name && strequal(name, i->name))
3651 return xml_attribute(i);
3652
3653 return xml_attribute();
3654 }
3655
3656 xml_node xml_node::next_sibling(const char_t* name) const
3657 {
3658 if (!_root) return xml_node();
3659
3660 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
3661 if (i->name && strequal(name, i->name)) return xml_node(i);
3662
3663 return xml_node();
3664 }
3665
3666 xml_node xml_node::next_sibling() const
3667 {
3668 if (!_root) return xml_node();
3669
3670 if (_root->next_sibling) return xml_node(_root->next_sibling);
3671 else return xml_node();
3672 }
3673
3674 xml_node xml_node::previous_sibling(const char_t* name) const
3675 {
3676 if (!_root) return xml_node();
3677
3678 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
3679 if (i->name && strequal(name, i->name)) return xml_node(i);
3680
3681 return xml_node();
3682 }
3683
3684 xml_node xml_node::previous_sibling() const
3685 {
3686 if (!_root) return xml_node();
3687
3688 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
3689 else return xml_node();
3690 }
3691
3692 xml_node xml_node::parent() const
3693 {
3694 return _root ? xml_node(_root->parent) : xml_node();
3695 }
3696
3697 xml_node xml_node::root() const
3698 {
3699 if (!_root) return xml_node();
3700
3701 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(_root->header & xml_memory_page_pointer_mask);
3702
3703 return xml_node(static_cast<xml_document_struct*>(page->allocator));
3704 }
3705
3706 const char_t* xml_node::child_value() const
3707 {
3708 if (!_root) return PUGIXML_TEXT("");
3709
3710 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
3711 {
3712 xml_node_type type = static_cast<xml_node_type>((i->header & xml_memory_page_type_mask) + 1);
3713
3714 if (i->value && (type == node_pcdata || type == node_cdata))
3715 return i->value;
3716 }
3717
3718 return PUGIXML_TEXT("");
3719 }
3720
3721 const char_t* xml_node::child_value(const char_t* name) const
3722 {
3723 return child(name).child_value();
3724 }
3725
3726 xml_attribute xml_node::first_attribute() const
3727 {
3728 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
3729 }
3730
3731 xml_attribute xml_node::last_attribute() const
3732 {
3733 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
3734 }
3735
3736 xml_node xml_node::first_child() const
3737 {
3738 return _root ? xml_node(_root->first_child) : xml_node();
3739 }
3740
3741 xml_node xml_node::last_child() const
3742 {
3743 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
3744 }
3745
3746 bool xml_node::set_name(const char_t* rhs)
3747 {
3748 switch (type())
3749 {
3750 case node_pi:
3751 case node_declaration:
3752 case node_element:
3753 return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs);
3754
3755 default:
3756 return false;
3757 }
3758 }
3759
3760 bool xml_node::set_value(const char_t* rhs)
3761 {
3762 switch (type())
3763 {
3764 case node_pi:
3765 case node_cdata:
3766 case node_pcdata:
3767 case node_comment:
3768 case node_doctype:
3769 return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs);
3770
3771 default:
3772 return false;
3773 }
3774 }
3775
3776 xml_attribute xml_node::append_attribute(const char_t* name)
3777 {
3778 if (type() != node_element && type() != node_declaration) return xml_attribute();
3779
3780 xml_attribute a(append_attribute_ll(_root, get_allocator(_root)));
3781 a.set_name(name);
3782
3783 return a;
3784 }
3785
3786 xml_attribute xml_node::prepend_attribute(const char_t* name)
3787 {
3788 if (type() != node_element && type() != node_declaration) return xml_attribute();
3789
3790 xml_attribute a(allocate_attribute(get_allocator(_root)));
3791 if (!a) return xml_attribute();
3792
3793 a.set_name(name);
3794
3795 xml_attribute_struct* head = _root->first_attribute;
3796
3797 if (head)
3798 {
3799 a._attr->prev_attribute_c = head->prev_attribute_c;
3800 head->prev_attribute_c = a._attr;
3801 }
3802 else
3803 a._attr->prev_attribute_c = a._attr;
3804
3805 a._attr->next_attribute = head;
3806 _root->first_attribute = a._attr;
3807
3808 return a;
3809 }
3810
3811 xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr)
3812 {
3813 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
3814
3815 // check that attribute belongs to *this
3816 xml_attribute_struct* cur = attr._attr;
3817
3818 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
3819
3820 if (cur != _root->first_attribute) return xml_attribute();
3821
3822 xml_attribute a(allocate_attribute(get_allocator(_root)));
3823 if (!a) return xml_attribute();
3824
3825 a.set_name(name);
3826
3827 if (attr._attr->prev_attribute_c->next_attribute)
3828 attr._attr->prev_attribute_c->next_attribute = a._attr;
3829 else
3830 _root->first_attribute = a._attr;
3831
3832 a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
3833 a._attr->next_attribute = attr._attr;
3834 attr._attr->prev_attribute_c = a._attr;
3835
3836 return a;
3837 }
3838
3839 xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr)
3840 {
3841 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
3842
3843 // check that attribute belongs to *this
3844 xml_attribute_struct* cur = attr._attr;
3845
3846 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
3847
3848 if (cur != _root->first_attribute) return xml_attribute();
3849
3850 xml_attribute a(allocate_attribute(get_allocator(_root)));
3851 if (!a) return xml_attribute();
3852
3853 a.set_name(name);
3854
3855 if (attr._attr->next_attribute)
3856 attr._attr->next_attribute->prev_attribute_c = a._attr;
3857 else
3858 _root->first_attribute->prev_attribute_c = a._attr;
3859
3860 a._attr->next_attribute = attr._attr->next_attribute;
3861 a._attr->prev_attribute_c = attr._attr;
3862 attr._attr->next_attribute = a._attr;
3863
3864 return a;
3865 }
3866
3867 xml_attribute xml_node::append_copy(const xml_attribute& proto)
3868 {
3869 if (!proto) return xml_attribute();
3870
3871 xml_attribute result = append_attribute(proto.name());
3872 result.set_value(proto.value());
3873
3874 return result;
3875 }
3876
3877 xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
3878 {
3879 if (!proto) return xml_attribute();
3880
3881 xml_attribute result = prepend_attribute(proto.name());
3882 result.set_value(proto.value());
3883
3884 return result;
3885 }
3886
3887 xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
3888 {
3889 if (!proto) return xml_attribute();
3890
3891 xml_attribute result = insert_attribute_after(proto.name(), attr);
3892 result.set_value(proto.value());
3893
3894 return result;
3895 }
3896
3897 xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
3898 {
3899 if (!proto) return xml_attribute();
3900
3901 xml_attribute result = insert_attribute_before(proto.name(), attr);
3902 result.set_value(proto.value());
3903
3904 return result;
3905 }
3906
3907 xml_node xml_node::append_child(xml_node_type type)
3908 {
3909 if (!allow_insert_child(this->type(), type)) return xml_node();
3910
3911 xml_node n(append_node(_root, get_allocator(_root), type));
3912
3913 if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
3914
3915 return n;
3916 }
3917
3918 xml_node xml_node::prepend_child(xml_node_type type)
3919 {
3920 if (!allow_insert_child(this->type(), type)) return xml_node();
3921
3922 xml_node n(allocate_node(get_allocator(_root), type));
3923 if (!n) return xml_node();
3924
3925 n._root->parent = _root;
3926
3927 xml_node_struct* head = _root->first_child;
3928
3929 if (head)
3930 {
3931 n._root->prev_sibling_c = head->prev_sibling_c;
3932 head->prev_sibling_c = n._root;
3933 }
3934 else
3935 n._root->prev_sibling_c = n._root;
3936
3937 n._root->next_sibling = head;
3938 _root->first_child = n._root;
3939
3940 if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
3941
3942 return n;
3943 }
3944
3945 xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node)
3946 {
3947 if (!allow_insert_child(this->type(), type)) return xml_node();
3948 if (!node._root || node._root->parent != _root) return xml_node();
3949
3950 xml_node n(allocate_node(get_allocator(_root), type));
3951 if (!n) return xml_node();
3952
3953 n._root->parent = _root;
3954
3955 if (node._root->prev_sibling_c->next_sibling)
3956 node._root->prev_sibling_c->next_sibling = n._root;
3957 else
3958 _root->first_child = n._root;
3959
3960 n._root->prev_sibling_c = node._root->prev_sibling_c;
3961 n._root->next_sibling = node._root;
3962 node._root->prev_sibling_c = n._root;
3963
3964 if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
3965
3966 return n;
3967 }
3968
3969 xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node)
3970 {
3971 if (!allow_insert_child(this->type(), type)) return xml_node();
3972 if (!node._root || node._root->parent != _root) return xml_node();
3973
3974 xml_node n(allocate_node(get_allocator(_root), type));
3975 if (!n) return xml_node();
3976
3977 n._root->parent = _root;
3978
3979 if (node._root->next_sibling)
3980 node._root->next_sibling->prev_sibling_c = n._root;
3981 else
3982 _root->first_child->prev_sibling_c = n._root;
3983
3984 n._root->next_sibling = node._root->next_sibling;
3985 n._root->prev_sibling_c = node._root;
3986 node._root->next_sibling = n._root;
3987
3988 if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
3989
3990 return n;
3991 }
3992
3993 xml_node xml_node::append_child(const char_t* name)
3994 {
3995 xml_node result = append_child(node_element);
3996
3997 result.set_name(name);
3998
3999 return result;
4000 }
4001
4002 xml_node xml_node::prepend_child(const char_t* name)
4003 {
4004 xml_node result = prepend_child(node_element);
4005
4006 result.set_name(name);
4007
4008 return result;
4009 }
4010
4011 xml_node xml_node::insert_child_after(const char_t* name, const xml_node& node)
4012 {
4013 xml_node result = insert_child_after(node_element, node);
4014
4015 result.set_name(name);
4016
4017 return result;
4018 }
4019
4020 xml_node xml_node::insert_child_before(const char_t* name, const xml_node& node)
4021 {
4022 xml_node result = insert_child_before(node_element, node);
4023
4024 result.set_name(name);
4025
4026 return result;
4027 }
4028
4029 xml_node xml_node::append_copy(const xml_node& proto)
4030 {
4031 xml_node result = append_child(proto.type());
4032
4033 if (result) recursive_copy_skip(result, proto, result);
4034
4035 return result;
4036 }
4037
4038 xml_node xml_node::prepend_copy(const xml_node& proto)
4039 {
4040 xml_node result = prepend_child(proto.type());
4041
4042 if (result) recursive_copy_skip(result, proto, result);
4043
4044 return result;
4045 }
4046
4047 xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
4048 {
4049 xml_node result = insert_child_after(proto.type(), node);
4050
4051 if (result) recursive_copy_skip(result, proto, result);
4052
4053 return result;
4054 }
4055
4056 xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
4057 {
4058 xml_node result = insert_child_before(proto.type(), node);
4059
4060 if (result) recursive_copy_skip(result, proto, result);
4061
4062 return result;
4063 }
4064
4065 bool xml_node::remove_attribute(const char_t* name)
4066 {
4067 return remove_attribute(attribute(name));
4068 }
4069
4070 bool xml_node::remove_attribute(const xml_attribute& a)
4071 {
4072 if (!_root || !a._attr) return false;
4073
4074 // check that attribute belongs to *this
4075 xml_attribute_struct* attr = a._attr;
4076
4077 while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
4078
4079 if (attr != _root->first_attribute) return false;
4080
4081 if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
4082 else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
4083
4084 if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
4085 else _root->first_attribute = a._attr->next_attribute;
4086
4087 destroy_attribute(a._attr, get_allocator(_root));
4088
4089 return true;
4090 }
4091
4092 bool xml_node::remove_child(const char_t* name)
4093 {
4094 return remove_child(child(name));
4095 }
4096
4097 bool xml_node::remove_child(const xml_node& n)
4098 {
4099 if (!_root || !n._root || n._root->parent != _root) return false;
4100
4101 if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
4102 else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
4103
4104 if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
4105 else _root->first_child = n._root->next_sibling;
4106
4107 destroy_node(n._root, get_allocator(_root));
4108
4109 return true;
4110 }
4111
4112 xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const
4113 {
4114 if (!_root) return xml_node();
4115
4116 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4117 if (i->name && strequal(name, i->name))
4118 {
4119 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
4120 if (strequal(attr_name, a->name) && strequal(attr_value, a->value))
4121 return xml_node(i);
4122 }
4123
4124 return xml_node();
4125 }
4126
4127 xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
4128 {
4129 if (!_root) return xml_node();
4130
4131 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4132 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
4133 if (strequal(attr_name, a->name) && strequal(attr_value, a->value))
4134 return xml_node(i);
4135
4136 return xml_node();
4137 }
4138
4139#ifndef PUGIXML_NO_STL
4140 string_t xml_node::path(char_t delimiter) const
4141 {
4142 string_t path;
4143
4144 xml_node cursor = *this; // Make a copy.
4145
4146 path = cursor.name();
4147
4148 while (cursor.parent())
4149 {
4150 cursor = cursor.parent();
4151
4152 string_t temp = cursor.name();
4153 temp += delimiter;
4154 temp += path;
4155 path.swap(temp);
4156 }
4157
4158 return path;
4159 }
4160#endif
4161
4162 xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const
4163 {
4164 xml_node found = *this; // Current search context.
4165
4166 if (!_root || !path || !path[0]) return found;
4167
4168 if (path[0] == delimiter)
4169 {
4170 // Absolute path; e.g. '/foo/bar'
4171 found = found.root();
4172 ++path;
4173 }
4174
4175 const char_t* path_segment = path;
4176
4177 while (*path_segment == delimiter) ++path_segment;
4178
4179 const char_t* path_segment_end = path_segment;
4180
4181 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
4182
4183 if (path_segment == path_segment_end) return found;
4184
4185 const char_t* next_segment = path_segment_end;
4186
4187 while (*next_segment == delimiter) ++next_segment;
4188
4189 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
4190 return found.first_element_by_path(next_segment, delimiter);
4191 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
4192 return found.parent().first_element_by_path(next_segment, delimiter);
4193 else
4194 {
4195 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
4196 {
4197 if (j->name && strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
4198 {
4199 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
4200
4201 if (subsearch) return subsearch;
4202 }
4203 }
4204
4205 return xml_node();
4206 }
4207 }
4208
4209 bool xml_node::traverse(xml_tree_walker& walker)
4210 {
4211 walker._depth = -1;
4212
4213 xml_node arg_begin = *this;
4214 if (!walker.begin(arg_begin)) return false;
4215
4216 xml_node cur = first_child();
4217
4218 if (cur)
4219 {
4220 ++walker._depth;
4221
4222 do
4223 {
4224 xml_node arg_for_each = cur;
4225 if (!walker.for_each(arg_for_each))
4226 return false;
4227
4228 if (cur.first_child())
4229 {
4230 ++walker._depth;
4231 cur = cur.first_child();
4232 }
4233 else if (cur.next_sibling())
4234 cur = cur.next_sibling();
4235 else
4236 {
4237 // Borland C++ workaround
4238 while (!cur.next_sibling() && cur != *this && (bool)cur.parent())
4239 {
4240 --walker._depth;
4241 cur = cur.parent();
4242 }
4243
4244 if (cur != *this)
4245 cur = cur.next_sibling();
4246 }
4247 }
4248 while (cur && cur != *this);
4249 }
4250
4251 assert(walker._depth == -1);
4252
4253 xml_node arg_end = *this;
4254 return walker.end(arg_end);
4255 }
4256
4257 size_t xml_node::hash_value() const
4258 {
4259 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
4260 }
4261
4262 xml_node_struct* xml_node::internal_object() const
4263 {
4264 return _root;
4265 }
4266
4267 void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
4268 {
4269 if (!_root) return;
4270
4271 xml_buffered_writer buffered_writer(writer, encoding);
4272
4273 node_output(buffered_writer, *this, indent, flags, depth);
4274 }
4275
4276#ifndef PUGIXML_NO_STL
4277 void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
4278 {
4279 xml_writer_stream writer(stream);
4280
4281 print(writer, indent, flags, encoding, depth);
4282 }
4283
4284 void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
4285 {
4286 xml_writer_stream writer(stream);
4287
4288 print(writer, indent, flags, encoding_wchar, depth);
4289 }
4290#endif
4291
4292 ptrdiff_t xml_node::offset_debug() const
4293 {
4294 xml_node_struct* r = root()._root;
4295
4296 if (!r) return -1;
4297
4298 const char_t* buffer = static_cast<xml_document_struct*>(r)->buffer;
4299
4300 if (!buffer) return -1;
4301
4302 switch (type())
4303 {
4304 case node_document:
4305 return 0;
4306
4307 case node_element:
4308 case node_declaration:
4309 case node_pi:
4310 return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
4311
4312 case node_pcdata:
4313 case node_cdata:
4314 case node_comment:
4315 case node_doctype:
4316 return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
4317
4318 default:
4319 return -1;
4320 }
4321 }
4322
4323#ifdef __BORLANDC__
4324 bool operator&&(const xml_node& lhs, bool rhs)
4325 {
4326 return (bool)lhs && rhs;
4327 }
4328
4329 bool operator||(const xml_node& lhs, bool rhs)
4330 {
4331 return (bool)lhs || rhs;
4332 }
4333#endif
4334
4335 xml_node_iterator::xml_node_iterator()
4336 {
4337 }
4338
4339 xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
4340 {
4341 }
4342
4343 xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
4344 {
4345 }
4346
4347 bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
4348 {
4349 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
4350 }
4351
4352 bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
4353 {
4354 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
4355 }
4356
4357 xml_node& xml_node_iterator::operator*()
4358 {
4359 assert(_wrap._root);
4360 return _wrap;
4361 }
4362
4363 xml_node* xml_node_iterator::operator->()
4364 {
4365 assert(_wrap._root);
4366 return &_wrap;
4367 }
4368
4369 const xml_node_iterator& xml_node_iterator::operator++()
4370 {
4371 assert(_wrap._root);
4372 _wrap._root = _wrap._root->next_sibling;
4373 return *this;
4374 }
4375
4376 xml_node_iterator xml_node_iterator::operator++(int)
4377 {
4378 xml_node_iterator temp = *this;
4379 ++*this;
4380 return temp;
4381 }
4382
4383 const xml_node_iterator& xml_node_iterator::operator--()
4384 {
4385 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
4386 return *this;
4387 }
4388
4389 xml_node_iterator xml_node_iterator::operator--(int)
4390 {
4391 xml_node_iterator temp = *this;
4392 --*this;
4393 return temp;
4394 }
4395
4396 xml_attribute_iterator::xml_attribute_iterator()
4397 {
4398 }
4399
4400 xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
4401 {
4402 }
4403
4404 xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
4405 {
4406 }
4407
4408 bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
4409 {
4410 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
4411 }
4412
4413 bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
4414 {
4415 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
4416 }
4417
4418 xml_attribute& xml_attribute_iterator::operator*()
4419 {
4420 assert(_wrap._attr);
4421 return _wrap;
4422 }
4423
4424 xml_attribute* xml_attribute_iterator::operator->()
4425 {
4426 assert(_wrap._attr);
4427 return &_wrap;
4428 }
4429
4430 const xml_attribute_iterator& xml_attribute_iterator::operator++()
4431 {
4432 assert(_wrap._attr);
4433 _wrap._attr = _wrap._attr->next_attribute;
4434 return *this;
4435 }
4436
4437 xml_attribute_iterator xml_attribute_iterator::operator++(int)
4438 {
4439 xml_attribute_iterator temp = *this;
4440 ++*this;
4441 return temp;
4442 }
4443
4444 const xml_attribute_iterator& xml_attribute_iterator::operator--()
4445 {
4446 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
4447 return *this;
4448 }
4449
4450 xml_attribute_iterator xml_attribute_iterator::operator--(int)
4451 {
4452 xml_attribute_iterator temp = *this;
4453 --*this;
4454 return temp;
4455 }
4456
4457 xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
4458 {
4459 }
4460
4461 xml_parse_result::operator bool() const
4462 {
4463 return status == status_ok;
4464 }
4465
4466 const char* xml_parse_result::description() const
4467 {
4468 switch (status)
4469 {
4470 case status_ok: return "No error";
4471
4472 case status_file_not_found: return "File was not found";
4473 case status_io_error: return "Error reading from file/stream";
4474 case status_out_of_memory: return "Could not allocate memory";
4475 case status_internal_error: return "Internal error occurred";
4476
4477 case status_unrecognized_tag: return "Could not determine tag type";
4478
4479 case status_bad_pi: return "Error parsing document declaration/processing instruction";
4480 case status_bad_comment: return "Error parsing comment";
4481 case status_bad_cdata: return "Error parsing CDATA section";
4482 case status_bad_doctype: return "Error parsing document type declaration";
4483 case status_bad_pcdata: return "Error parsing PCDATA section";
4484 case status_bad_start_element: return "Error parsing start element tag";
4485 case status_bad_attribute: return "Error parsing element attribute";
4486 case status_bad_end_element: return "Error parsing end element tag";
4487 case status_end_element_mismatch: return "Start-end tags mismatch";
4488
4489 default: return "Unknown error";
4490 }
4491 }
4492
4493 xml_document::xml_document(): _buffer(0)
4494 {
4495 create();
4496 }
4497
4498 xml_document::~xml_document()
4499 {
4500 destroy();
4501 }
4502
4503 void xml_document::reset()
4504 {
4505 destroy();
4506 create();
4507 }
4508
4509 void xml_document::reset(const xml_document& proto)
4510 {
4511 reset();
4512
4513 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
4514 append_copy(cur);
4515 }
4516
4517 void xml_document::create()
4518 {
4519 // initialize sentinel page
4520 STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory));
4521
4522 // align upwards to page boundary
4523 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
4524
4525 // prepare page structure
4526 xml_memory_page* page = xml_memory_page::construct(page_memory);
4527
4528 page->busy_size = xml_memory_page_size;
4529
4530 // allocate new root
4531 _root = new (page->data) xml_document_struct(page);
4532 _root->prev_sibling_c = _root;
4533
4534 // setup sentinel page
4535 page->allocator = static_cast<xml_document_struct*>(_root);
4536 }
4537
4538 void xml_document::destroy()
4539 {
4540 // destroy static storage
4541 if (_buffer)
4542 {
4543 global_deallocate(_buffer);
4544 _buffer = 0;
4545 }
4546
4547 // destroy dynamic storage, leave sentinel page (it's in static memory)
4548 if (_root)
4549 {
4550 xml_memory_page* root_page = reinterpret_cast<xml_memory_page*>(_root->header & xml_memory_page_pointer_mask);
4551 assert(root_page && !root_page->prev && !root_page->memory);
4552
4553 // destroy all pages
4554 for (xml_memory_page* page = root_page->next; page; )
4555 {
4556 xml_memory_page* next = page->next;
4557
4558 xml_allocator::deallocate_page(page);
4559
4560 page = next;
4561 }
4562
4563 // cleanup root page
4564 root_page->allocator = 0;
4565 root_page->next = 0;
4566 root_page->busy_size = root_page->freed_size = 0;
4567
4568 _root = 0;
4569 }
4570 }
4571
4572#ifndef PUGIXML_NO_STL
4573 xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
4574 {
4575 reset();
4576
4577 return load_stream_impl(*this, stream, options, encoding);
4578 }
4579
4580 xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
4581 {
4582 reset();
4583
4584 return load_stream_impl(*this, stream, options, encoding_wchar);
4585 }
4586#endif
4587
4588 xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
4589 {
4590 // Force native encoding (skip autodetection)
4591 #ifdef PUGIXML_WCHAR_MODE
4592 xml_encoding encoding = encoding_wchar;
4593 #else
4594 xml_encoding encoding = encoding_utf8;
4595 #endif
4596
4597 return load_buffer(contents, strlength(contents) * sizeof(char_t), options, encoding);
4598 }
4599
4600 xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding)
4601 {
4602 reset();
4603
4604 FILE* file = fopen(path, "rb");
4605
4606 return load_file_impl(*this, file, options, encoding);
4607 }
4608
4609 xml_parse_result xml_document::load_file(const wchar_t* path, unsigned int options, xml_encoding encoding)
4610 {
4611 reset();
4612
4613 FILE* file = open_file_wide(path, L"rb");
4614
4615 return load_file_impl(*this, file, options, encoding);
4616 }
4617
4618 xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own)
4619 {
4620 reset();
4621
4622 // check input buffer
4623 assert(contents || size == 0);
4624
4625 // get actual encoding
4626 xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size);
4627
4628 // get private buffer
4629 char_t* buffer = 0;
4630 size_t length = 0;
4631
4632 if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory);
4633
4634 // delete original buffer if we performed a conversion
4635 if (own && buffer != contents && contents) global_deallocate(contents);
4636
4637 // parse
4638 xml_parse_result res = xml_parser::parse(buffer, length, _root, options);
4639
4640 // remember encoding
4641 res.encoding = buffer_encoding;
4642
4643 // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
4644 if (own || buffer != contents) _buffer = buffer;
4645
4646 return res;
4647 }
4648
4649 xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
4650 {
4651 return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false);
4652 }
4653
4654 xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
4655 {
4656 return load_buffer_impl(contents, size, options, encoding, true, false);
4657 }
4658
4659 xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
4660 {
4661 return load_buffer_impl(contents, size, options, encoding, true, true);
4662 }
4663
4664 void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
4665 {
4666 if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding));
4667
4668 xml_buffered_writer buffered_writer(writer, encoding);
4669
4670 if (!(flags & format_no_declaration) && !has_declaration(*this))
4671 {
4672 buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\"?>"));
4673 if (!(flags & format_raw)) buffered_writer.write('\n');
4674 }
4675
4676 node_output(buffered_writer, *this, indent, flags, 0);
4677 }
4678
4679#ifndef PUGIXML_NO_STL
4680 void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
4681 {
4682 xml_writer_stream writer(stream);
4683
4684 save(writer, indent, flags, encoding);
4685 }
4686
4687 void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
4688 {
4689 xml_writer_stream writer(stream);
4690
4691 save(writer, indent, flags, encoding_wchar);
4692 }
4693#endif
4694
4695 bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const
4696 {
4697 FILE* file = fopen(path, "wb");
4698 if (!file) return false;
4699
4700 xml_writer_file writer(file);
4701 save(writer, indent, flags, encoding);
4702
4703 fclose(file);
4704
4705 return true;
4706 }
4707
4708 bool xml_document::save_file(const wchar_t* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const
4709 {
4710 FILE* file = open_file_wide(path, L"wb");
4711 if (!file) return false;
4712
4713 xml_writer_file writer(file);
4714 save(writer, indent, flags, encoding);
4715
4716 fclose(file);
4717
4718 return true;
4719 }
4720
4721 xml_node xml_document::document_element() const
4722 {
4723 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4724 if ((i->header & xml_memory_page_type_mask) + 1 == node_element)
4725 return xml_node(i);
4726
4727 return xml_node();
4728 }
4729
4730#ifndef PUGIXML_NO_STL
4731 std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
4732 {
4733 assert(str);
4734
4735 return as_utf8_impl(str, wcslen(str));
4736 }
4737
4738 std::string PUGIXML_FUNCTION as_utf8(const std::wstring& str)
4739 {
4740 return as_utf8_impl(str.c_str(), str.size());
4741 }
4742
4743 std::wstring PUGIXML_FUNCTION as_wide(const char* str)
4744 {
4745 assert(str);
4746
4747 return as_wide_impl(str, strlen(str));
4748 }
4749
4750 std::wstring PUGIXML_FUNCTION as_wide(const std::string& str)
4751 {
4752 return as_wide_impl(str.c_str(), str.size());
4753 }
4754#endif
4755
4756 void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
4757 {
4758 global_allocate = allocate;
4759 global_deallocate = deallocate;
4760 }
4761
4762 allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
4763 {
4764 return global_allocate;
4765 }
4766
4767 deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
4768 {
4769 return global_deallocate;
4770 }
4771}
4772
4773#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
4774namespace std
4775{
4776 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
4777 std::bidirectional_iterator_tag _Iter_cat(const xml_node_iterator&)
4778 {
4779 return std::bidirectional_iterator_tag();
4780 }
4781
4782 std::bidirectional_iterator_tag _Iter_cat(const xml_attribute_iterator&)
4783 {
4784 return std::bidirectional_iterator_tag();
4785 }
4786}
4787#endif
4788
4789#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
4790namespace std
4791{
4792 // Workarounds for (non-standard) iterator category detection
4793 std::bidirectional_iterator_tag __iterator_category(const xml_node_iterator&)
4794 {
4795 return std::bidirectional_iterator_tag();
4796 }
4797
4798 std::bidirectional_iterator_tag __iterator_category(const xml_attribute_iterator&)
4799 {
4800 return std::bidirectional_iterator_tag();
4801 }
4802}
4803#endif
4804
4805#ifndef PUGIXML_NO_XPATH
4806
4807// STL replacements
4808namespace
4809{
4810 struct equal_to
4811 {
4812 template <typename T> bool operator()(const T& lhs, const T& rhs) const
4813 {
4814 return lhs == rhs;
4815 }
4816 };
4817
4818 struct not_equal_to
4819 {
4820 template <typename T> bool operator()(const T& lhs, const T& rhs) const
4821 {
4822 return lhs != rhs;
4823 }
4824 };
4825
4826 struct less
4827 {
4828 template <typename T> bool operator()(const T& lhs, const T& rhs) const
4829 {
4830 return lhs < rhs;
4831 }
4832 };
4833
4834 struct less_equal
4835 {
4836 template <typename T> bool operator()(const T& lhs, const T& rhs) const
4837 {
4838 return lhs <= rhs;
4839 }
4840 };
4841
4842 template <typename T> void swap(T& lhs, T& rhs)
4843 {
4844 T temp = lhs;
4845 lhs = rhs;
4846 rhs = temp;
4847 }
4848
4849 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
4850 {
4851 I result = begin;
4852
4853 for (I it = begin + 1; it != end; ++it)
4854 if (pred(*it, *result))
4855 result = it;
4856
4857 return result;
4858 }
4859
4860 template <typename I> void reverse(I begin, I end)
4861 {
4862 while (begin + 1 < end) swap(*begin++, *--end);
4863 }
4864
4865 template <typename I> I unique(I begin, I end)
4866 {
4867 // fast skip head
4868 while (begin + 1 < end && *begin != *(begin + 1)) begin++;
4869
4870 if (begin == end) return begin;
4871
4872 // last written element
4873 I write = begin++;
4874
4875 // merge unique elements
4876 while (begin != end)
4877 {
4878 if (*begin != *write)
4879 *++write = *begin++;
4880 else
4881 begin++;
4882 }
4883
4884 // past-the-end (write points to live element)
4885 return write + 1;
4886 }
4887
4888 template <typename I> void copy_backwards(I begin, I end, I target)
4889 {
4890 while (begin != end) *--target = *--end;
4891 }
4892
4893 template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
4894 {
4895 assert(begin != end);
4896
4897 for (I it = begin + 1; it != end; ++it)
4898 {
4899 T val = *it;
4900
4901 if (pred(val, *begin))
4902 {
4903 // move to front
4904 copy_backwards(begin, it, it + 1);
4905 *begin = val;
4906 }
4907 else
4908 {
4909 I hole = it;
4910
4911 // move hole backwards
4912 while (pred(val, *(hole - 1)))
4913 {
4914 *hole = *(hole - 1);
4915 hole--;
4916 }
4917
4918 // fill hole with element
4919 *hole = val;
4920 }
4921 }
4922 }
4923
4924 // std variant for elements with ==
4925 template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
4926 {
4927 I eqbeg = middle, eqend = middle + 1;
4928
4929 // expand equal range
4930 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
4931 while (eqend != end && *eqend == *eqbeg) ++eqend;
4932
4933 // process outer elements
4934 I ltend = eqbeg, gtbeg = eqend;
4935
4936 for (;;)
4937 {
4938 // find the element from the right side that belongs to the left one
4939 for (; gtbeg != end; ++gtbeg)
4940 if (!pred(*eqbeg, *gtbeg))
4941 {
4942 if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
4943 else break;
4944 }
4945
4946 // find the element from the left side that belongs to the right one
4947 for (; ltend != begin; --ltend)
4948 if (!pred(*(ltend - 1), *eqbeg))
4949 {
4950 if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
4951 else break;
4952 }
4953
4954 // scanned all elements
4955 if (gtbeg == end && ltend == begin)
4956 {
4957 *out_eqbeg = eqbeg;
4958 *out_eqend = eqend;
4959 return;
4960 }
4961
4962 // make room for elements by moving equal area
4963 if (gtbeg == end)
4964 {
4965 if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
4966 swap(*eqbeg, *--eqend);
4967 }
4968 else if (ltend == begin)
4969 {
4970 if (eqend != gtbeg) swap(*eqbeg, *eqend);
4971 ++eqend;
4972 swap(*gtbeg++, *eqbeg++);
4973 }
4974 else swap(*gtbeg++, *--ltend);
4975 }
4976 }
4977
4978 template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
4979 {
4980 if (pred(*middle, *first)) swap(*middle, *first);
4981 if (pred(*last, *middle)) swap(*last, *middle);
4982 if (pred(*middle, *first)) swap(*middle, *first);
4983 }
4984
4985 template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
4986 {
4987 if (last - first <= 40)
4988 {
4989 // median of three for small chunks
4990 median3(first, middle, last, pred);
4991 }
4992 else
4993 {
4994 // median of nine
4995 size_t step = (last - first + 1) / 8;
4996
4997 median3(first, first + step, first + 2 * step, pred);
4998 median3(middle - step, middle, middle + step, pred);
4999 median3(last - 2 * step, last - step, last, pred);
5000 median3(first + step, middle, last - step, pred);
5001 }
5002 }
5003
5004 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
5005 {
5006 // sort large chunks
5007 while (end - begin > 32)
5008 {
5009 // find median element
5010 I middle = begin + (end - begin) / 2;
5011 median(begin, middle, end - 1, pred);
5012
5013 // partition in three chunks (< = >)
5014 I eqbeg, eqend;
5015 partition(begin, middle, end, pred, &eqbeg, &eqend);
5016
5017 // loop on larger half
5018 if (eqbeg - begin > end - eqend)
5019 {
5020 sort(eqend, end, pred);
5021 end = eqbeg;
5022 }
5023 else
5024 {
5025 sort(begin, eqbeg, pred);
5026 begin = eqend;
5027 }
5028 }
5029
5030 // insertion sort small chunk
5031 if (begin != end) insertion_sort(begin, end, pred, &*begin);
5032 }
5033}
5034
5035// Allocator used for AST and evaluation stacks
5036namespace
5037{
5038 struct xpath_memory_block
5039 {
5040 xpath_memory_block* next;
5041
5042 char data[4096];
5043 };
5044
5045 class xpath_allocator
5046 {
5047 xpath_memory_block* _root;
5048 size_t _root_size;
5049
5050 public:
5051 #ifdef PUGIXML_NO_EXCEPTIONS
5052 jmp_buf* error_handler;
5053 #endif
5054
5055 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
5056 {
5057 #ifdef PUGIXML_NO_EXCEPTIONS
5058 error_handler = 0;
5059 #endif
5060 }
5061
5062 void* allocate_nothrow(size_t size)
5063 {
5064 const size_t block_capacity = sizeof(_root->data);
5065
5066 // align size so that we're able to store pointers in subsequent blocks
5067 size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5068
5069 if (_root_size + size <= block_capacity)
5070 {
5071 void* buf = _root->data + _root_size;
5072 _root_size += size;
5073 return buf;
5074 }
5075 else
5076 {
5077 size_t block_data_size = (size > block_capacity) ? size : block_capacity;
5078 size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
5079
5080 xpath_memory_block* block = static_cast<xpath_memory_block*>(global_allocate(block_size));
5081 if (!block) return 0;
5082
5083 block->next = _root;
5084
5085 _root = block;
5086 _root_size = size;
5087
5088 return block->data;
5089 }
5090 }
5091
5092 void* allocate(size_t size)
5093 {
5094 void* result = allocate_nothrow(size);
5095
5096 if (!result)
5097 {
5098 #ifdef PUGIXML_NO_EXCEPTIONS
5099 assert(error_handler);
5100 longjmp(*error_handler, 1);
5101 #else
5102 throw std::bad_alloc();
5103 #endif
5104 }
5105
5106 return result;
5107 }
5108
5109 void* reallocate(void* ptr, size_t old_size, size_t new_size)
5110 {
5111 // align size so that we're able to store pointers in subsequent blocks
5112 old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5113 new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5114
5115 // we can only reallocate the last object
5116 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
5117
5118 // adjust root size so that we have not allocated the object at all
5119 bool only_object = (_root_size == old_size);
5120
5121 if (ptr) _root_size -= old_size;
5122
5123 // allocate a new version (this will obviously reuse the memory if possible)
5124 void* result = allocate(new_size);
5125 assert(result);
5126
5127 // we have a new block
5128 if (result != ptr && ptr)
5129 {
5130 // copy old data
5131 assert(new_size > old_size);
5132 memcpy(result, ptr, old_size);
5133
5134 // free the previous page if it had no other objects
5135 if (only_object)
5136 {
5137 assert(_root->data == result);
5138 assert(_root->next);
5139
5140 xpath_memory_block* next = _root->next->next;
5141
5142 if (next)
5143 {
5144 // deallocate the whole page, unless it was the first one
5145 global_deallocate(_root->next);
5146 _root->next = next;
5147 }
5148 }
5149 }
5150
5151 return result;
5152 }
5153
5154 void revert(const xpath_allocator& state)
5155 {
5156 // free all new pages
5157 xpath_memory_block* cur = _root;
5158
5159 while (cur != state._root)
5160 {
5161 xpath_memory_block* next = cur->next;
5162
5163 global_deallocate(cur);
5164
5165 cur = next;
5166 }
5167
5168 // restore state
5169 _root = state._root;
5170 _root_size = state._root_size;
5171 }
5172
5173 void release()
5174 {
5175 xpath_memory_block* cur = _root;
5176 assert(cur);
5177
5178 while (cur->next)
5179 {
5180 xpath_memory_block* next = cur->next;
5181
5182 global_deallocate(cur);
5183
5184 cur = next;
5185 }
5186 }
5187 };
5188
5189 struct xpath_allocator_capture
5190 {
5191 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
5192 {
5193 }
5194
5195 ~xpath_allocator_capture()
5196 {
5197 _target->revert(_state);
5198 }
5199
5200 xpath_allocator* _target;
5201 xpath_allocator _state;
5202 };
5203
5204 struct xpath_stack
5205 {
5206 xpath_allocator* result;
5207 xpath_allocator* temp;
5208 };
5209
5210 struct xpath_stack_data
5211 {
5212 xpath_memory_block blocks[2];
5213 xpath_allocator result;
5214 xpath_allocator temp;
5215 xpath_stack stack;
5216
5217 #ifdef PUGIXML_NO_EXCEPTIONS
5218 jmp_buf error_handler;
5219 #endif
5220
5221 xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
5222 {
5223 blocks[0].next = blocks[1].next = 0;
5224
5225 stack.result = &result;
5226 stack.temp = &temp;
5227
5228 #ifdef PUGIXML_NO_EXCEPTIONS
5229 result.error_handler = temp.error_handler = &error_handler;
5230 #endif
5231 }
5232
5233 ~xpath_stack_data()
5234 {
5235 result.release();
5236 temp.release();
5237 }
5238 };
5239}
5240
5241// String class
5242namespace
5243{
5244 class xpath_string
5245 {
5246 const char_t* _buffer;
5247 bool _uses_heap;
5248
5249 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
5250 {
5251 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
5252 assert(result);
5253
5254 memcpy(result, string, length * sizeof(char_t));
5255 result[length] = 0;
5256
5257 return result;
5258 }
5259
5260 static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)
5261 {
5262 return duplicate_string(string, strlength(string), alloc);
5263 }
5264
5265 public:
5266 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
5267 {
5268 }
5269
5270 explicit xpath_string(const char_t* str, xpath_allocator* alloc)
5271 {
5272 bool empty = (*str == 0);
5273
5274 _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
5275 _uses_heap = !empty;
5276 }
5277
5278 explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
5279 {
5280 }
5281
5282 xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)
5283 {
5284 assert(begin <= end);
5285
5286 bool empty = (begin == end);
5287
5288 _buffer = empty ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
5289 _uses_heap = !empty;
5290 }
5291
5292 void append(const xpath_string& o, xpath_allocator* alloc)
5293 {
5294 // skip empty sources
5295 if (!*o._buffer) return;
5296
5297 // fast append for constant empty target and constant source
5298 if (!*_buffer && !_uses_heap && !o._uses_heap)
5299 {
5300 _buffer = o._buffer;
5301 }
5302 else
5303 {
5304 // need to make heap copy
5305 size_t target_length = strlength(_buffer);
5306 size_t source_length = strlength(o._buffer);
5307 size_t length = target_length + source_length;
5308
5309 // allocate new buffer
5310 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (length + 1) * sizeof(char_t)));
5311 assert(result);
5312
5313 // append first string to the new buffer in case there was no reallocation
5314 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
5315
5316 // append second string to the new buffer
5317 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
5318 result[length] = 0;
5319
5320 // finalize
5321 _buffer = result;
5322 _uses_heap = true;
5323 }
5324 }
5325
5326 const char_t* c_str() const
5327 {
5328 return _buffer;
5329 }
5330
5331 size_t length() const
5332 {
5333 return strlength(_buffer);
5334 }
5335
5336 char_t* data(xpath_allocator* alloc)
5337 {
5338 // make private heap copy
5339 if (!_uses_heap)
5340 {
5341 _buffer = duplicate_string(_buffer, alloc);
5342 _uses_heap = true;
5343 }
5344
5345 return const_cast<char_t*>(_buffer);
5346 }
5347
5348 bool empty() const
5349 {
5350 return *_buffer == 0;
5351 }
5352
5353 bool operator==(const xpath_string& o) const
5354 {
5355 return strequal(_buffer, o._buffer);
5356 }
5357
5358 bool operator!=(const xpath_string& o) const
5359 {
5360 return !strequal(_buffer, o._buffer);
5361 }
5362
5363 bool uses_heap() const
5364 {
5365 return _uses_heap;
5366 }
5367 };
5368
5369 xpath_string xpath_string_const(const char_t* str)
5370 {
5371 return xpath_string(str, false);
5372 }
5373}
5374
5375namespace
5376{
5377 bool starts_with(const char_t* string, const char_t* pattern)
5378 {
5379 while (*pattern && *string == *pattern)
5380 {
5381 string++;
5382 pattern++;
5383 }
5384
5385 return *pattern == 0;
5386 }
5387
5388 const char_t* find_char(const char_t* s, char_t c)
5389 {
5390 #ifdef PUGIXML_WCHAR_MODE
5391 return wcschr(s, c);
5392 #else
5393 return strchr(s, c);
5394 #endif
5395 }
5396
5397 const char_t* find_substring(const char_t* s, const char_t* p)
5398 {
5399 #ifdef PUGIXML_WCHAR_MODE
5400 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
5401 return (*p == 0) ? s : wcsstr(s, p);
5402 #else
5403 return strstr(s, p);
5404 #endif
5405 }
5406
5407 // Converts symbol to lower case, if it is an ASCII one
5408 char_t tolower_ascii(char_t ch)
5409 {
5410 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
5411 }
5412
5413 xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
5414 {
5415 if (na.attribute())
5416 return xpath_string_const(na.attribute().value());
5417 else
5418 {
5419 const xml_node& n = na.node();
5420
5421 switch (n.type())
5422 {
5423 case node_pcdata:
5424 case node_cdata:
5425 case node_comment:
5426 case node_pi:
5427 return xpath_string_const(n.value());
5428
5429 case node_document:
5430 case node_element:
5431 {
5432 xpath_string result;
5433
5434 xml_node cur = n.first_child();
5435
5436 while (cur && cur != n)
5437 {
5438 if (cur.type() == node_pcdata || cur.type() == node_cdata)
5439 result.append(xpath_string_const(cur.value()), alloc);
5440
5441 if (cur.first_child())
5442 cur = cur.first_child();
5443 else if (cur.next_sibling())
5444 cur = cur.next_sibling();
5445 else
5446 {
5447 while (!cur.next_sibling() && cur != n)
5448 cur = cur.parent();
5449
5450 if (cur != n) cur = cur.next_sibling();
5451 }
5452 }
5453
5454 return result;
5455 }
5456
5457 default:
5458 return xpath_string();
5459 }
5460 }
5461 }
5462
5463 unsigned int node_height(xml_node n)
5464 {
5465 unsigned int result = 0;
5466
5467 while (n)
5468 {
5469 ++result;
5470 n = n.parent();
5471 }
5472
5473 return result;
5474 }
5475
5476 bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
5477 {
5478 // normalize heights
5479 for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
5480 for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
5481
5482 // one node is the ancestor of the other
5483 if (ln == rn) return lh < rh;
5484
5485 // find common ancestor
5486 while (ln.parent() != rn.parent())
5487 {
5488 ln = ln.parent();
5489 rn = rn.parent();
5490 }
5491
5492 // there is no common ancestor (the shared parent is null), nodes are from different documents
5493 if (!ln.parent()) return ln < rn;
5494
5495 // determine sibling order
5496 for (; ln; ln = ln.next_sibling())
5497 if (ln == rn)
5498 return true;
5499
5500 return false;
5501 }
5502
5503 bool node_is_ancestor(xml_node parent, xml_node node)
5504 {
5505 while (node && node != parent) node = node.parent();
5506
5507 return parent && node == parent;
5508 }
5509
5510 const void* document_order(const xpath_node& xnode)
5511 {
5512 xml_node_struct* node = xnode.node().internal_object();
5513
5514 if (node)
5515 {
5516 if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
5517 if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
5518 return 0;
5519 }
5520
5521 xml_attribute_struct* attr = xnode.attribute().internal_object();
5522
5523 if (attr)
5524 {
5525 if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
5526 if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
5527 return 0;
5528 }
5529
5530 return 0;
5531 }
5532
5533 struct document_order_comparator
5534 {
5535 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
5536 {
5537 // optimized document order based check
5538 const void* lo = document_order(lhs);
5539 const void* ro = document_order(rhs);
5540
5541 if (lo && ro) return lo < ro;
5542
5543 // slow comparison
5544 xml_node ln = lhs.node(), rn = rhs.node();
5545
5546 // compare attributes
5547 if (lhs.attribute() && rhs.attribute())
5548 {
5549 // shared parent
5550 if (lhs.parent() == rhs.parent())
5551 {
5552 // determine sibling order
5553 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
5554 if (a == rhs.attribute())
5555 return true;
5556
5557 return false;
5558 }
5559
5560 // compare attribute parents
5561 ln = lhs.parent();
5562 rn = rhs.parent();
5563 }
5564 else if (lhs.attribute())
5565 {
5566 // attributes go after the parent element
5567 if (lhs.parent() == rhs.node()) return false;
5568
5569 ln = lhs.parent();
5570 }
5571 else if (rhs.attribute())
5572 {
5573 // attributes go after the parent element
5574 if (rhs.parent() == lhs.node()) return true;
5575
5576 rn = rhs.parent();
5577 }
5578
5579 if (ln == rn) return false;
5580
5581 unsigned int lh = node_height(ln);
5582 unsigned int rh = node_height(rn);
5583
5584 return node_is_before(ln, lh, rn, rh);
5585 }
5586 };
5587
5588 struct duplicate_comparator
5589 {
5590 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
5591 {
5592 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
5593 else return rhs.attribute() ? false : lhs.node() < rhs.node();
5594 }
5595 };
5596
5597 double gen_nan()
5598 {
5599 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
5600 union { float f; int32_t i; } u[sizeof(float) == sizeof(int32_t) ? 1 : -1];
5601 u[0].i = 0x7fc00000;
5602 return u[0].f;
5603 #else
5604 // fallback
5605 const volatile double zero = 0.0;
5606 return zero / zero;
5607 #endif
5608 }
5609
5610 bool is_nan(double value)
5611 {
5612 #if defined(_MSC_VER) || defined(__BORLANDC__)
5613 return !!_isnan(value);
5614 #elif defined(fpclassify) && defined(FP_NAN)
5615 return fpclassify(value) == FP_NAN;
5616 #else
5617 // fallback
5618 const volatile double v = value;
5619 return v != v;
5620 #endif
5621 }
5622
5623 const char_t* convert_number_to_string_special(double value)
5624 {
5625 #if defined(_MSC_VER) || defined(__BORLANDC__)
5626 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
5627 if (_isnan(value)) return PUGIXML_TEXT("NaN");
5628 return PUGIXML_TEXT("-Infinity") + (value > 0);
5629 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
5630 switch (fpclassify(value))
5631 {
5632 case FP_NAN:
5633 return PUGIXML_TEXT("NaN");
5634
5635 case FP_INFINITE:
5636 return PUGIXML_TEXT("-Infinity") + (value > 0);
5637
5638 case FP_ZERO:
5639 return PUGIXML_TEXT("0");
5640
5641 default:
5642 return 0;
5643 }
5644 #else
5645 // fallback
5646 const volatile double v = value;
5647
5648 if (v == 0) return PUGIXML_TEXT("0");
5649 if (v != v) return PUGIXML_TEXT("NaN");
5650 if (v * 2 == v) return PUGIXML_TEXT("-Infinity") + (value > 0);
5651 return 0;
5652 #endif
5653 }
5654
5655 bool convert_number_to_boolean(double value)
5656 {
5657 return (value != 0 && !is_nan(value));
5658 }
5659
5660 void truncate_zeros(char* begin, char* end)
5661 {
5662 while (begin != end && end[-1] == '0') end--;
5663
5664 *end = 0;
5665 }
5666
5667 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
5668#if defined(_MSC_VER) && _MSC_VER >= 1400
5669 void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
5670 {
5671 // get base values
5672 int sign, exponent;
5673 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
5674
5675 // truncate redundant zeros
5676 truncate_zeros(buffer, buffer + strlen(buffer));
5677
5678 // fill results
5679 *out_mantissa = buffer;
5680 *out_exponent = exponent;
5681 }
5682#else
5683 void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
5684 {
5685 // get a scientific notation value with IEEE DBL_DIG decimals
5686 sprintf(buffer, "%.*e", DBL_DIG, value);
5687 assert(strlen(buffer) < buffer_size);
5688 (void)!buffer_size;
5689
5690 // get the exponent (possibly negative)
5691 char* exponent_string = strchr(buffer, 'e');
5692 assert(exponent_string);
5693
5694 int exponent = atoi(exponent_string + 1);
5695
5696 // extract mantissa string: skip sign
5697 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
5698 assert(mantissa[0] != '0' && mantissa[1] == '.');
5699
5700 // divide mantissa by 10 to eliminate integer part
5701 mantissa[1] = mantissa[0];
5702 mantissa++;
5703 exponent++;
5704
5705 // remove extra mantissa digits and zero-terminate mantissa
5706 truncate_zeros(mantissa, exponent_string);
5707
5708 // fill results
5709 *out_mantissa = mantissa;
5710 *out_exponent = exponent;
5711 }
5712#endif
5713
5714 xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
5715 {
5716 // try special number conversion
5717 const char_t* special = convert_number_to_string_special(value);
5718 if (special) return xpath_string_const(special);
5719
5720 // get mantissa + exponent form
5721 char mantissa_buffer[64];
5722
5723 char* mantissa;
5724 int exponent;
5725 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
5726
5727 // make the number!
5728 char_t result[512];
5729 char_t* s = result;
5730
5731 // sign
5732 if (value < 0) *s++ = '-';
5733
5734 // integer part
5735 if (exponent <= 0)
5736 {
5737 *s++ = '0';
5738 }
5739 else
5740 {
5741 while (exponent > 0)
5742 {
5743 assert(*mantissa == 0 || (unsigned)(*mantissa - '0') <= 9);
5744 *s++ = *mantissa ? *mantissa++ : '0';
5745 exponent--;
5746 }
5747 }
5748
5749 // fractional part
5750 if (*mantissa)
5751 {
5752 // decimal point
5753 *s++ = '.';
5754
5755 // extra zeroes from negative exponent
5756 while (exponent < 0)
5757 {
5758 *s++ = '0';
5759 exponent++;
5760 }
5761
5762 // extra mantissa digits
5763 while (*mantissa)
5764 {
5765 assert((unsigned)(*mantissa - '0') <= 9);
5766 *s++ = *mantissa++;
5767 }
5768 }
5769
5770 // zero-terminate
5771 assert(s < result + sizeof(result) / sizeof(result[0]));
5772 *s = 0;
5773
5774 return xpath_string(result, alloc);
5775 }
5776
5777 bool check_string_to_number_format(const char_t* string)
5778 {
5779 // parse leading whitespace
5780 while (IS_CHARTYPE(*string, ct_space)) ++string;
5781
5782 // parse sign
5783 if (*string == '-') ++string;
5784
5785 if (!*string) return false;
5786
5787 // if there is no integer part, there should be a decimal part with at least one digit
5788 if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false;
5789
5790 // parse integer part
5791 while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
5792
5793 // parse decimal part
5794 if (*string == '.')
5795 {
5796 ++string;
5797
5798 while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
5799 }
5800
5801 // parse trailing whitespace
5802 while (IS_CHARTYPE(*string, ct_space)) ++string;
5803
5804 return *string == 0;
5805 }
5806
5807 double convert_string_to_number(const char_t* string)
5808 {
5809 // check string format
5810 if (!check_string_to_number_format(string)) return gen_nan();
5811
5812 // parse string
5813 #ifdef PUGIXML_WCHAR_MODE
5814 return wcstod(string, 0);
5815 #else
5816 return atof(string);
5817 #endif
5818 }
5819
5820 bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
5821 {
5822 char_t buffer[32];
5823
5824 size_t length = static_cast<size_t>(end - begin);
5825 char_t* scratch = buffer;
5826
5827 if (length >= sizeof(buffer) / sizeof(buffer[0]))
5828 {
5829 // need to make dummy on-heap copy
5830 scratch = static_cast<char_t*>(global_allocate((length + 1) * sizeof(char_t)));
5831 if (!scratch) return false;
5832 }
5833
5834 // copy string to zero-terminated buffer and perform conversion
5835 memcpy(scratch, begin, length * sizeof(char_t));
5836 scratch[length] = 0;
5837
5838 *out_result = convert_string_to_number(scratch);
5839
5840 // free dummy buffer
5841 if (scratch != buffer) global_deallocate(scratch);
5842
5843 return true;
5844 }
5845
5846 double round_nearest(double value)
5847 {
5848 return floor(value + 0.5);
5849 }
5850
5851 double round_nearest_nzero(double value)
5852 {
5853 // same as round_nearest, but returns -0 for [-0.5, -0]
5854 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
5855 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
5856 }
5857
5858 const char_t* qualified_name(const xpath_node& node)
5859 {
5860 return node.attribute() ? node.attribute().name() : node.node().name();
5861 }
5862
5863 const char_t* local_name(const xpath_node& node)
5864 {
5865 const char_t* name = qualified_name(node);
5866 const char_t* p = find_char(name, ':');
5867
5868 return p ? p + 1 : name;
5869 }
5870
5871 struct namespace_uri_predicate
5872 {
5873 const char_t* prefix;
5874 size_t prefix_length;
5875
5876 namespace_uri_predicate(const char_t* name)
5877 {
5878 const char_t* pos = find_char(name, ':');
5879
5880 prefix = pos ? name : 0;
5881 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
5882 }
5883
5884 bool operator()(const xml_attribute& a) const
5885 {
5886 const char_t* name = a.name();
5887
5888 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
5889
5890 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
5891 }
5892 };
5893
5894 const char_t* namespace_uri(const xml_node& node)
5895 {
5896 namespace_uri_predicate pred = node.name();
5897
5898 xml_node p = node;
5899
5900 while (p)
5901 {
5902 xml_attribute a = p.find_attribute(pred);
5903
5904 if (a) return a.value();
5905
5906 p = p.parent();
5907 }
5908
5909 return PUGIXML_TEXT("");
5910 }
5911
5912 const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
5913 {
5914 namespace_uri_predicate pred = attr.name();
5915
5916 // Default namespace does not apply to attributes
5917 if (!pred.prefix) return PUGIXML_TEXT("");
5918
5919 xml_node p = parent;
5920
5921 while (p)
5922 {
5923 xml_attribute a = p.find_attribute(pred);
5924
5925 if (a) return a.value();
5926
5927 p = p.parent();
5928 }
5929
5930 return PUGIXML_TEXT("");
5931 }
5932
5933 const char_t* namespace_uri(const xpath_node& node)
5934 {
5935 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
5936 }
5937
5938 void normalize_space(char_t* buffer)
5939 {
5940 char_t* write = buffer;
5941
5942 for (char_t* it = buffer; *it; )
5943 {
5944 char_t ch = *it++;
5945
5946 if (IS_CHARTYPE(ch, ct_space))
5947 {
5948 // replace whitespace sequence with single space
5949 while (IS_CHARTYPE(*it, ct_space)) it++;
5950
5951 // avoid leading spaces
5952 if (write != buffer) *write++ = ' ';
5953 }
5954 else *write++ = ch;
5955 }
5956
5957 // remove trailing space
5958 if (write != buffer && IS_CHARTYPE(write[-1], ct_space)) write--;
5959
5960 // zero-terminate
5961 *write = 0;
5962 }
5963
5964 void translate(char_t* buffer, const char_t* from, const char_t* to)
5965 {
5966 size_t to_length = strlength(to);
5967
5968 char_t* write = buffer;
5969
5970 while (*buffer)
5971 {
5972 DMC_VOLATILE char_t ch = *buffer++;
5973
5974 const char_t* pos = find_char(from, ch);
5975
5976 if (!pos)
5977 *write++ = ch; // do not process
5978 else if (static_cast<size_t>(pos - from) < to_length)
5979 *write++ = to[pos - from]; // replace
5980 }
5981
5982 // zero-terminate
5983 *write = 0;
5984 }
5985
5986 struct xpath_variable_boolean: xpath_variable
5987 {
5988 xpath_variable_boolean(): value(false)
5989 {
5990 }
5991
5992 bool value;
5993 char_t name[1];
5994 };
5995
5996 struct xpath_variable_number: xpath_variable
5997 {
5998 xpath_variable_number(): value(0)
5999 {
6000 }
6001
6002 double value;
6003 char_t name[1];
6004 };
6005
6006 struct xpath_variable_string: xpath_variable
6007 {
6008 xpath_variable_string(): value(0)
6009 {
6010 }
6011
6012 ~xpath_variable_string()
6013 {
6014 if (value) global_deallocate(value);
6015 }
6016
6017 char_t* value;
6018 char_t name[1];
6019 };
6020
6021 struct xpath_variable_node_set: xpath_variable
6022 {
6023 xpath_node_set value;
6024 char_t name[1];
6025 };
6026
6027 const xpath_node_set dummy_node_set;
6028
6029 unsigned int hash_string(const char_t* str)
6030 {
6031 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
6032 unsigned int result = 0;
6033
6034 while (*str)
6035 {
6036 result += static_cast<unsigned int>(*str++);
6037 result += result << 10;
6038 result ^= result >> 6;
6039 }
6040
6041 result += result << 3;
6042 result ^= result >> 11;
6043 result += result << 15;
6044
6045 return result;
6046 }
6047
6048 template <typename T> T* new_xpath_variable(const char_t* name)
6049 {
6050 size_t length = strlength(name);
6051 if (length == 0) return 0; // empty variable names are invalid
6052
6053 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
6054 void* memory = global_allocate(sizeof(T) + length * sizeof(char_t));
6055 if (!memory) return 0;
6056
6057 T* result = new (memory) T();
6058
6059 memcpy(result->name, name, (length + 1) * sizeof(char_t));
6060
6061 return result;
6062 }
6063
6064 xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
6065 {
6066 switch (type)
6067 {
6068 case xpath_type_node_set:
6069 return new_xpath_variable<xpath_variable_node_set>(name);
6070
6071 case xpath_type_number:
6072 return new_xpath_variable<xpath_variable_number>(name);
6073
6074 case xpath_type_string:
6075 return new_xpath_variable<xpath_variable_string>(name);
6076
6077 case xpath_type_boolean:
6078 return new_xpath_variable<xpath_variable_boolean>(name);
6079
6080 default:
6081 return 0;
6082 }
6083 }
6084
6085 template <typename T> void delete_xpath_variable(T* var)
6086 {
6087 var->~T();
6088 global_deallocate(var);
6089 }
6090
6091 void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
6092 {
6093 switch (type)
6094 {
6095 case xpath_type_node_set:
6096 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
6097 break;
6098
6099 case xpath_type_number:
6100 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
6101 break;
6102
6103 case xpath_type_string:
6104 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
6105 break;
6106
6107 case xpath_type_boolean:
6108 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
6109 break;
6110
6111 default:
6112 assert(!"Invalid variable type");
6113 }
6114 }
6115
6116 xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end)
6117 {
6118 char_t buffer[32];
6119
6120 size_t length = static_cast<size_t>(end - begin);
6121 char_t* scratch = buffer;
6122
6123 if (length >= sizeof(buffer) / sizeof(buffer[0]))
6124 {
6125 // need to make dummy on-heap copy
6126 scratch = static_cast<char_t*>(global_allocate((length + 1) * sizeof(char_t)));
6127 if (!scratch) return 0;
6128 }
6129
6130 // copy string to zero-terminated buffer and perform lookup
6131 memcpy(scratch, begin, length * sizeof(char_t));
6132 scratch[length] = 0;
6133
6134 xpath_variable* result = set->get(scratch);
6135
6136 // free dummy buffer
6137 if (scratch != buffer) global_deallocate(scratch);
6138
6139 return result;
6140 }
6141}
6142
6143// Internal node set class
6144namespace
6145{
6146 xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
6147 {
6148 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
6149
6150 if (type == xpath_node_set::type_unsorted)
6151 {
6152 sort(begin, end, document_order_comparator());
6153
6154 type = xpath_node_set::type_sorted;
6155 }
6156
6157 if (type != order) reverse(begin, end);
6158
6159 return order;
6160 }
6161
6162 xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
6163 {
6164 if (begin == end) return xpath_node();
6165
6166 switch (type)
6167 {
6168 case xpath_node_set::type_sorted:
6169 return *begin;
6170
6171 case xpath_node_set::type_sorted_reverse:
6172 return *(end - 1);
6173
6174 case xpath_node_set::type_unsorted:
6175 return *min_element(begin, end, document_order_comparator());
6176
6177 default:
6178 assert(!"Invalid node set type");
6179 return xpath_node();
6180 }
6181 }
6182 class xpath_node_set_raw
6183 {
6184 xpath_node_set::type_t _type;
6185
6186 xpath_node* _begin;
6187 xpath_node* _end;
6188 xpath_node* _eos;
6189
6190 public:
6191 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
6192 {
6193 }
6194
6195 xpath_node* begin() const
6196 {
6197 return _begin;
6198 }
6199
6200 xpath_node* end() const
6201 {
6202 return _end;
6203 }
6204
6205 bool empty() const
6206 {
6207 return _begin == _end;
6208 }
6209
6210 size_t size() const
6211 {
6212 return static_cast<size_t>(_end - _begin);
6213 }
6214
6215 xpath_node first() const
6216 {
6217 return xpath_first(_begin, _end, _type);
6218 }
6219
6220 void push_back(const xpath_node& node, xpath_allocator* alloc)
6221 {
6222 if (_end == _eos)
6223 {
6224 size_t capacity = static_cast<size_t>(_eos - _begin);
6225
6226 // get new capacity (1.5x rule)
6227 size_t new_capacity = capacity + capacity / 2 + 1;
6228
6229 // reallocate the old array or allocate a new one
6230 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
6231 assert(data);
6232
6233 // finalize
6234 _begin = data;
6235 _end = data + capacity;
6236 _eos = data + new_capacity;
6237 }
6238
6239 *_end++ = node;
6240 }
6241
6242 void append(const xpath_node* begin, const xpath_node* end, xpath_allocator* alloc)
6243 {
6244 size_t size = static_cast<size_t>(_end - _begin);
6245 size_t capacity = static_cast<size_t>(_eos - _begin);
6246 size_t count = static_cast<size_t>(end - begin);
6247
6248 if (size + count > capacity)
6249 {
6250 // reallocate the old array or allocate a new one
6251 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size + count) * sizeof(xpath_node)));
6252 assert(data);
6253
6254 // finalize
6255 _begin = data;
6256 _end = data + size;
6257 _eos = data + size + count;
6258 }
6259
6260 memcpy(_end, begin, count * sizeof(xpath_node));
6261 _end += count;
6262 }
6263
6264 void sort_do()
6265 {
6266 _type = xpath_sort(_begin, _end, _type, false);
6267 }
6268
6269 void truncate(xpath_node* pos)
6270 {
6271 assert(_begin <= pos && pos <= _end);
6272
6273 _end = pos;
6274 }
6275
6276 void remove_duplicates()
6277 {
6278 if (_type == xpath_node_set::type_unsorted)
6279 sort(_begin, _end, duplicate_comparator());
6280
6281 _end = unique(_begin, _end);
6282 }
6283
6284 xpath_node_set::type_t type() const
6285 {
6286 return _type;
6287 }
6288
6289 void set_type(xpath_node_set::type_t type)
6290 {
6291 _type = type;
6292 }
6293 };
6294}
6295
6296namespace
6297{
6298 struct xpath_context
6299 {
6300 xpath_node n;
6301 size_t position, size;
6302
6303 xpath_context(const xpath_node& n, size_t position, size_t size): n(n), position(position), size(size)
6304 {
6305 }
6306 };
6307
6308 enum lexeme_t
6309 {
6310 lex_none = 0,
6311 lex_equal,
6312 lex_not_equal,
6313 lex_less,
6314 lex_greater,
6315 lex_less_or_equal,
6316 lex_greater_or_equal,
6317 lex_plus,
6318 lex_minus,
6319 lex_multiply,
6320 lex_union,
6321 lex_var_ref,
6322 lex_open_brace,
6323 lex_close_brace,
6324 lex_quoted_string,
6325 lex_number,
6326 lex_slash,
6327 lex_double_slash,
6328 lex_open_square_brace,
6329 lex_close_square_brace,
6330 lex_string,
6331 lex_comma,
6332 lex_axis_attribute,
6333 lex_dot,
6334 lex_double_dot,
6335 lex_double_colon,
6336 lex_eof
6337 };
6338
6339 struct xpath_lexer_string
6340 {
6341 const char_t* begin;
6342 const char_t* end;
6343
6344 xpath_lexer_string(): begin(0), end(0)
6345 {
6346 }
6347
6348 bool operator==(const char_t* other) const
6349 {
6350 size_t length = static_cast<size_t>(end - begin);
6351
6352 return strequalrange(other, begin, length);
6353 }
6354 };
6355
6356 class xpath_lexer
6357 {
6358 const char_t* _cur;
6359 const char_t* _cur_lexeme_pos;
6360 xpath_lexer_string _cur_lexeme_contents;
6361
6362 lexeme_t _cur_lexeme;
6363
6364 public:
6365 explicit xpath_lexer(const char_t* query): _cur(query)
6366 {
6367 next();
6368 }
6369
6370 const char_t* state() const
6371 {
6372 return _cur;
6373 }
6374
6375 void next()
6376 {
6377 const char_t* cur = _cur;
6378
6379 while (IS_CHARTYPE(*cur, ct_space)) ++cur;
6380
6381 // save lexeme position for error reporting
6382 _cur_lexeme_pos = cur;
6383
6384 switch (*cur)
6385 {
6386 case 0:
6387 _cur_lexeme = lex_eof;
6388 break;
6389
6390 case '>':
6391 if (*(cur+1) == '=')
6392 {
6393 cur += 2;
6394 _cur_lexeme = lex_greater_or_equal;
6395 }
6396 else
6397 {
6398 cur += 1;
6399 _cur_lexeme = lex_greater;
6400 }
6401 break;
6402
6403 case '<':
6404 if (*(cur+1) == '=')
6405 {
6406 cur += 2;
6407 _cur_lexeme = lex_less_or_equal;
6408 }
6409 else
6410 {
6411 cur += 1;
6412 _cur_lexeme = lex_less;
6413 }
6414 break;
6415
6416 case '!':
6417 if (*(cur+1) == '=')
6418 {
6419 cur += 2;
6420 _cur_lexeme = lex_not_equal;
6421 }
6422 else
6423 {
6424 _cur_lexeme = lex_none;
6425 }
6426 break;
6427
6428 case '=':
6429 cur += 1;
6430 _cur_lexeme = lex_equal;
6431
6432 break;
6433
6434 case '+':
6435 cur += 1;
6436 _cur_lexeme = lex_plus;
6437
6438 break;
6439
6440 case '-':
6441 cur += 1;
6442 _cur_lexeme = lex_minus;
6443
6444 break;
6445
6446 case '*':
6447 cur += 1;
6448 _cur_lexeme = lex_multiply;
6449
6450 break;
6451
6452 case '|':
6453 cur += 1;
6454 _cur_lexeme = lex_union;
6455
6456 break;
6457
6458 case '$':
6459 cur += 1;
6460
6461 if (IS_CHARTYPEX(*cur, ctx_start_symbol))
6462 {
6463 _cur_lexeme_contents.begin = cur;
6464
6465 while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6466
6467 if (cur[0] == ':' && IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
6468 {
6469 cur++; // :
6470
6471 while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6472 }
6473
6474 _cur_lexeme_contents.end = cur;
6475
6476 _cur_lexeme = lex_var_ref;
6477 }
6478 else
6479 {
6480 _cur_lexeme = lex_none;
6481 }
6482
6483 break;
6484
6485 case '(':
6486 cur += 1;
6487 _cur_lexeme = lex_open_brace;
6488
6489 break;
6490
6491 case ')':
6492 cur += 1;
6493 _cur_lexeme = lex_close_brace;
6494
6495 break;
6496
6497 case '[':
6498 cur += 1;
6499 _cur_lexeme = lex_open_square_brace;
6500
6501 break;
6502
6503 case ']':
6504 cur += 1;
6505 _cur_lexeme = lex_close_square_brace;
6506
6507 break;
6508
6509 case ',':
6510 cur += 1;
6511 _cur_lexeme = lex_comma;
6512
6513 break;
6514
6515 case '/':
6516 if (*(cur+1) == '/')
6517 {
6518 cur += 2;
6519 _cur_lexeme = lex_double_slash;
6520 }
6521 else
6522 {
6523 cur += 1;
6524 _cur_lexeme = lex_slash;
6525 }
6526 break;
6527
6528 case '.':
6529 if (*(cur+1) == '.')
6530 {
6531 cur += 2;
6532 _cur_lexeme = lex_double_dot;
6533 }
6534 else if (IS_CHARTYPEX(*(cur+1), ctx_digit))
6535 {
6536 _cur_lexeme_contents.begin = cur; // .
6537
6538 ++cur;
6539
6540 while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6541
6542 _cur_lexeme_contents.end = cur;
6543
6544 _cur_lexeme = lex_number;
6545 }
6546 else
6547 {
6548 cur += 1;
6549 _cur_lexeme = lex_dot;
6550 }
6551 break;
6552
6553 case '@':
6554 cur += 1;
6555 _cur_lexeme = lex_axis_attribute;
6556
6557 break;
6558
6559 case '"':
6560 case '\'':
6561 {
6562 char_t terminator = *cur;
6563
6564 ++cur;
6565
6566 _cur_lexeme_contents.begin = cur;
6567 while (*cur && *cur != terminator) cur++;
6568 _cur_lexeme_contents.end = cur;
6569
6570 if (!*cur)
6571 _cur_lexeme = lex_none;
6572 else
6573 {
6574 cur += 1;
6575 _cur_lexeme = lex_quoted_string;
6576 }
6577
6578 break;
6579 }
6580
6581 case ':':
6582 if (*(cur+1) == ':')
6583 {
6584 cur += 2;
6585 _cur_lexeme = lex_double_colon;
6586 }
6587 else
6588 {
6589 _cur_lexeme = lex_none;
6590 }
6591 break;
6592
6593 default:
6594 if (IS_CHARTYPEX(*cur, ctx_digit))
6595 {
6596 _cur_lexeme_contents.begin = cur;
6597
6598 while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6599
6600 if (*cur == '.')
6601 {
6602 cur++;
6603
6604 while (IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6605 }
6606
6607 _cur_lexeme_contents.end = cur;
6608
6609 _cur_lexeme = lex_number;
6610 }
6611 else if (IS_CHARTYPEX(*cur, ctx_start_symbol))
6612 {
6613 _cur_lexeme_contents.begin = cur;
6614
6615 while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6616
6617 if (cur[0] == ':')
6618 {
6619 if (cur[1] == '*') // namespace test ncname:*
6620 {
6621 cur += 2; // :*
6622 }
6623 else if (IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
6624 {
6625 cur++; // :
6626
6627 while (IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6628 }
6629 }
6630
6631 _cur_lexeme_contents.end = cur;
6632
6633 _cur_lexeme = lex_string;
6634 }
6635 else
6636 {
6637 _cur_lexeme = lex_none;
6638 }
6639 }
6640
6641 _cur = cur;
6642 }
6643
6644 lexeme_t current() const
6645 {
6646 return _cur_lexeme;
6647 }
6648
6649 const char_t* current_pos() const
6650 {
6651 return _cur_lexeme_pos;
6652 }
6653
6654 const xpath_lexer_string& contents() const
6655 {
6656 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
6657
6658 return _cur_lexeme_contents;
6659 }
6660 };
6661
6662 enum ast_type_t
6663 {
6664 ast_op_or, // left or right
6665 ast_op_and, // left and right
6666 ast_op_equal, // left = right
6667 ast_op_not_equal, // left != right
6668 ast_op_less, // left < right
6669 ast_op_greater, // left > right
6670 ast_op_less_or_equal, // left <= right
6671 ast_op_greater_or_equal, // left >= right
6672 ast_op_add, // left + right
6673 ast_op_subtract, // left - right
6674 ast_op_multiply, // left * right
6675 ast_op_divide, // left / right
6676 ast_op_mod, // left % right
6677 ast_op_negate, // left - right
6678 ast_op_union, // left | right
6679 ast_predicate, // apply predicate to set; next points to next predicate
6680 ast_filter, // select * from left where right
6681 ast_filter_posinv, // select * from left where right; proximity position invariant
6682 ast_string_constant, // string constant
6683 ast_number_constant, // number constant
6684 ast_variable, // variable
6685 ast_func_last, // last()
6686 ast_func_position, // position()
6687 ast_func_count, // count(left)
6688 ast_func_id, // id(left)
6689 ast_func_local_name_0, // local-name()
6690 ast_func_local_name_1, // local-name(left)
6691 ast_func_namespace_uri_0, // namespace-uri()
6692 ast_func_namespace_uri_1, // namespace-uri(left)
6693 ast_func_name_0, // name()
6694 ast_func_name_1, // name(left)
6695 ast_func_string_0, // string()
6696 ast_func_string_1, // string(left)
6697 ast_func_concat, // concat(left, right, siblings)
6698 ast_func_starts_with, // starts_with(left, right)
6699 ast_func_contains, // contains(left, right)
6700 ast_func_substring_before, // substring-before(left, right)
6701 ast_func_substring_after, // substring-after(left, right)
6702 ast_func_substring_2, // substring(left, right)
6703 ast_func_substring_3, // substring(left, right, third)
6704 ast_func_string_length_0, // string-length()
6705 ast_func_string_length_1, // string-length(left)
6706 ast_func_normalize_space_0, // normalize-space()
6707 ast_func_normalize_space_1, // normalize-space(left)
6708 ast_func_translate, // translate(left, right, third)
6709 ast_func_boolean, // boolean(left)
6710 ast_func_not, // not(left)
6711 ast_func_true, // true()
6712 ast_func_false, // false()
6713 ast_func_lang, // lang(left)
6714 ast_func_number_0, // number()
6715 ast_func_number_1, // number(left)
6716 ast_func_sum, // sum(left)
6717 ast_func_floor, // floor(left)
6718 ast_func_ceiling, // ceiling(left)
6719 ast_func_round, // round(left)
6720 ast_step, // process set left with step
6721 ast_step_root // select root node
6722 };
6723
6724 enum axis_t
6725 {
6726 axis_ancestor,
6727 axis_ancestor_or_self,
6728 axis_attribute,
6729 axis_child,
6730 axis_descendant,
6731 axis_descendant_or_self,
6732 axis_following,
6733 axis_following_sibling,
6734 axis_namespace,
6735 axis_parent,
6736 axis_preceding,
6737 axis_preceding_sibling,
6738 axis_self
6739 };
6740
6741 enum nodetest_t
6742 {
6743 nodetest_none,
6744 nodetest_name,
6745 nodetest_type_node,
6746 nodetest_type_comment,
6747 nodetest_type_pi,
6748 nodetest_type_text,
6749 nodetest_pi,
6750 nodetest_all,
6751 nodetest_all_in_namespace
6752 };
6753
6754 template <axis_t N> struct axis_to_type
6755 {
6756 static const axis_t axis;
6757 };
6758
6759 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
6760
6761 class xpath_ast_node
6762 {
6763 private:
6764 // node type
6765 char _type;
6766 char _rettype;
6767
6768 // for ast_step / ast_predicate
6769 char _axis;
6770 char _test;
6771
6772 // tree node structure
6773 xpath_ast_node* _left;
6774 xpath_ast_node* _right;
6775 xpath_ast_node* _next;
6776
6777 union
6778 {
6779 // value for ast_string_constant
6780 const char_t* string;
6781 // value for ast_number_constant
6782 double number;
6783 // variable for ast_variable
6784 xpath_variable* variable;
6785 // node test for ast_step (node name/namespace/node type/pi target)
6786 const char_t* nodetest;
6787 } _data;
6788
6789 xpath_ast_node(const xpath_ast_node&);
6790 xpath_ast_node& operator=(const xpath_ast_node&);
6791
6792 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
6793 {
6794 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
6795
6796 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
6797 {
6798 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
6799 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
6800 else if (lt == xpath_type_number || rt == xpath_type_number)
6801 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
6802 else if (lt == xpath_type_string || rt == xpath_type_string)
6803 {
6804 xpath_allocator_capture cr(stack.result);
6805
6806 xpath_string ls = lhs->eval_string(c, stack);
6807 xpath_string rs = rhs->eval_string(c, stack);
6808
6809 return comp(ls, rs);
6810 }
6811 }
6812 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
6813 {
6814 xpath_allocator_capture cr(stack.result);
6815
6816 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
6817 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6818
6819 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
6820 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6821 {
6822 xpath_allocator_capture cri(stack.result);
6823
6824 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
6825 return true;
6826 }
6827
6828 return false;
6829 }
6830 else
6831 {
6832 if (lt == xpath_type_node_set)
6833 {
6834 swap(lhs, rhs);
6835 swap(lt, rt);
6836 }
6837
6838 if (lt == xpath_type_boolean)
6839 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
6840 else if (lt == xpath_type_number)
6841 {
6842 xpath_allocator_capture cr(stack.result);
6843
6844 double l = lhs->eval_number(c, stack);
6845 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6846
6847 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6848 {
6849 xpath_allocator_capture cri(stack.result);
6850
6851 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
6852 return true;
6853 }
6854
6855 return false;
6856 }
6857 else if (lt == xpath_type_string)
6858 {
6859 xpath_allocator_capture cr(stack.result);
6860
6861 xpath_string l = lhs->eval_string(c, stack);
6862 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6863
6864 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6865 {
6866 xpath_allocator_capture cri(stack.result);
6867
6868 if (comp(l, string_value(*ri, stack.result)))
6869 return true;
6870 }
6871
6872 return false;
6873 }
6874 }
6875
6876 assert(!"Wrong types");
6877 return false;
6878 }
6879
6880 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
6881 {
6882 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
6883
6884 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
6885 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
6886 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
6887 {
6888 xpath_allocator_capture cr(stack.result);
6889
6890 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
6891 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6892
6893 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
6894 {
6895 xpath_allocator_capture cri(stack.result);
6896
6897 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
6898
6899 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6900 {
6901 xpath_allocator_capture crii(stack.result);
6902
6903 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
6904 return true;
6905 }
6906 }
6907
6908 return false;
6909 }
6910 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
6911 {
6912 xpath_allocator_capture cr(stack.result);
6913
6914 double l = lhs->eval_number(c, stack);
6915 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
6916
6917 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
6918 {
6919 xpath_allocator_capture cri(stack.result);
6920
6921 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
6922 return true;
6923 }
6924
6925 return false;
6926 }
6927 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
6928 {
6929 xpath_allocator_capture cr(stack.result);
6930
6931 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
6932 double r = rhs->eval_number(c, stack);
6933
6934 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
6935 {
6936 xpath_allocator_capture cri(stack.result);
6937
6938 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
6939 return true;
6940 }
6941
6942 return false;
6943 }
6944 else
6945 {
6946 assert(!"Wrong types");
6947 return false;
6948 }
6949 }
6950
6951 void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
6952 {
6953 assert(ns.size() >= first);
6954
6955 size_t i = 1;
6956 size_t size = ns.size() - first;
6957
6958 xpath_node* last = ns.begin() + first;
6959
6960 // remove_if... or well, sort of
6961 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
6962 {
6963 xpath_context c(*it, i, size);
6964
6965 if (expr->rettype() == xpath_type_number)
6966 {
6967 if (expr->eval_number(c, stack) == i)
6968 *last++ = *it;
6969 }
6970 else if (expr->eval_boolean(c, stack))
6971 *last++ = *it;
6972 }
6973
6974 ns.truncate(last);
6975 }
6976
6977 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)
6978 {
6979 if (ns.size() == first) return;
6980
6981 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
6982 {
6983 apply_predicate(ns, first, pred->_left, stack);
6984 }
6985 }
6986
6987 void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)
6988 {
6989 if (!a) return;
6990
6991 const char_t* name = a.name();
6992
6993 // There are no attribute nodes corresponding to attributes that declare namespaces
6994 // That is, "xmlns:..." or "xmlns"
6995 if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
6996
6997 switch (_test)
6998 {
6999 case nodetest_name:
7000 if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
7001 break;
7002
7003 case nodetest_type_node:
7004 case nodetest_all:
7005 ns.push_back(xpath_node(a, parent), alloc);
7006 break;
7007
7008 case nodetest_all_in_namespace:
7009 if (starts_with(name, _data.nodetest))
7010 ns.push_back(xpath_node(a, parent), alloc);
7011 break;
7012
7013 default:
7014 ;
7015 }
7016 }
7017
7018 void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)
7019 {
7020 if (!n) return;
7021
7022 switch (_test)
7023 {
7024 case nodetest_name:
7025 if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
7026 break;
7027
7028 case nodetest_type_node:
7029 ns.push_back(n, alloc);
7030 break;
7031
7032 case nodetest_type_comment:
7033 if (n.type() == node_comment)
7034 ns.push_back(n, alloc);
7035 break;
7036
7037 case nodetest_type_text:
7038 if (n.type() == node_pcdata || n.type() == node_cdata)
7039 ns.push_back(n, alloc);
7040 break;
7041
7042 case nodetest_type_pi:
7043 if (n.type() == node_pi)
7044 ns.push_back(n, alloc);
7045 break;
7046
7047 case nodetest_pi:
7048 if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
7049 ns.push_back(n, alloc);
7050 break;
7051
7052 case nodetest_all:
7053 if (n.type() == node_element)
7054 ns.push_back(n, alloc);
7055 break;
7056
7057 case nodetest_all_in_namespace:
7058 if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
7059 ns.push_back(n, alloc);
7060 break;
7061
7062 default:
7063 assert(!"Unknown axis");
7064 }
7065 }
7066
7067 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)
7068 {
7069 const axis_t axis = T::axis;
7070
7071 switch (axis)
7072 {
7073 case axis_attribute:
7074 {
7075 for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
7076 step_push(ns, a, n, alloc);
7077
7078 break;
7079 }
7080
7081 case axis_child:
7082 {
7083 for (xml_node c = n.first_child(); c; c = c.next_sibling())
7084 step_push(ns, c, alloc);
7085
7086 break;
7087 }
7088
7089 case axis_descendant:
7090 case axis_descendant_or_self:
7091 {
7092 if (axis == axis_descendant_or_self)
7093 step_push(ns, n, alloc);
7094
7095 xml_node cur = n.first_child();
7096
7097 while (cur && cur != n)
7098 {
7099 step_push(ns, cur, alloc);
7100
7101 if (cur.first_child())
7102 cur = cur.first_child();
7103 else if (cur.next_sibling())
7104 cur = cur.next_sibling();
7105 else
7106 {
7107 while (!cur.next_sibling() && cur != n)
7108 cur = cur.parent();
7109
7110 if (cur != n) cur = cur.next_sibling();
7111 }
7112 }
7113
7114 break;
7115 }
7116
7117 case axis_following_sibling:
7118 {
7119 for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
7120 step_push(ns, c, alloc);
7121
7122 break;
7123 }
7124
7125 case axis_preceding_sibling:
7126 {
7127 for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
7128 step_push(ns, c, alloc);
7129
7130 break;
7131 }
7132
7133 case axis_following:
7134 {
7135 xml_node cur = n;
7136
7137 // exit from this node so that we don't include descendants
7138 while (cur && !cur.next_sibling()) cur = cur.parent();
7139 cur = cur.next_sibling();
7140
7141 for (;;)
7142 {
7143 step_push(ns, cur, alloc);
7144
7145 if (cur.first_child())
7146 cur = cur.first_child();
7147 else if (cur.next_sibling())
7148 cur = cur.next_sibling();
7149 else
7150 {
7151 while (cur && !cur.next_sibling()) cur = cur.parent();
7152 cur = cur.next_sibling();
7153
7154 if (!cur) break;
7155 }
7156 }
7157
7158 break;
7159 }
7160
7161 case axis_preceding:
7162 {
7163 xml_node cur = n;
7164
7165 while (cur && !cur.previous_sibling()) cur = cur.parent();
7166 cur = cur.previous_sibling();
7167
7168 for (;;)
7169 {
7170 if (cur.last_child())
7171 cur = cur.last_child();
7172 else
7173 {
7174 // leaf node, can't be ancestor
7175 step_push(ns, cur, alloc);
7176
7177 if (cur.previous_sibling())
7178 cur = cur.previous_sibling();
7179 else
7180 {
7181 do
7182 {
7183 cur = cur.parent();
7184 if (!cur) break;
7185
7186 if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
7187 }
7188 while (!cur.previous_sibling());
7189
7190 cur = cur.previous_sibling();
7191
7192 if (!cur) break;
7193 }
7194 }
7195 }
7196
7197 break;
7198 }
7199
7200 case axis_ancestor:
7201 case axis_ancestor_or_self:
7202 {
7203 if (axis == axis_ancestor_or_self)
7204 step_push(ns, n, alloc);
7205
7206 xml_node cur = n.parent();
7207
7208 while (cur)
7209 {
7210 step_push(ns, cur, alloc);
7211
7212 cur = cur.parent();
7213 }
7214
7215 break;
7216 }
7217
7218 case axis_self:
7219 {
7220 step_push(ns, n, alloc);
7221
7222 break;
7223 }
7224
7225 case axis_parent:
7226 {
7227 if (n.parent()) step_push(ns, n.parent(), alloc);
7228
7229 break;
7230 }
7231
7232 default:
7233 assert(!"Unimplemented axis");
7234 }
7235 }
7236
7237 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)
7238 {
7239 const axis_t axis = T::axis;
7240
7241 switch (axis)
7242 {
7243 case axis_ancestor:
7244 case axis_ancestor_or_self:
7245 {
7246 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
7247 step_push(ns, a, p, alloc);
7248
7249 xml_node cur = p;
7250
7251 while (cur)
7252 {
7253 step_push(ns, cur, alloc);
7254
7255 cur = cur.parent();
7256 }
7257
7258 break;
7259 }
7260
7261 case axis_descendant_or_self:
7262 case axis_self:
7263 {
7264 if (_test == nodetest_type_node) // reject attributes based on principal node type test
7265 step_push(ns, a, p, alloc);
7266
7267 break;
7268 }
7269
7270 case axis_following:
7271 {
7272 xml_node cur = p;
7273
7274 for (;;)
7275 {
7276 if (cur.first_child())
7277 cur = cur.first_child();
7278 else if (cur.next_sibling())
7279 cur = cur.next_sibling();
7280 else
7281 {
7282 while (cur && !cur.next_sibling()) cur = cur.parent();
7283 cur = cur.next_sibling();
7284
7285 if (!cur) break;
7286 }
7287
7288 step_push(ns, cur, alloc);
7289 }
7290
7291 break;
7292 }
7293
7294 case axis_parent:
7295 {
7296 step_push(ns, p, alloc);
7297
7298 break;
7299 }
7300
7301 case axis_preceding:
7302 {
7303 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
7304 step_fill(ns, p, alloc, v);
7305 break;
7306 }
7307
7308 default:
7309 assert(!"Unimplemented axis");
7310 }
7311 }
7312
7313 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
7314 {
7315 const axis_t axis = T::axis;
7316 bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
7317
7318 xpath_node_set_raw ns;
7319 ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
7320
7321 if (_left)
7322 {
7323 xpath_node_set_raw s = _left->eval_node_set(c, stack);
7324
7325 // self axis preserves the original order
7326 if (axis == axis_self) ns.set_type(s.type());
7327
7328 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
7329 {
7330 size_t size = ns.size();
7331
7332 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
7333 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
7334
7335 if (it->node())
7336 step_fill(ns, it->node(), stack.result, v);
7337 else if (attributes)
7338 step_fill(ns, it->attribute(), it->parent(), stack.result, v);
7339
7340 apply_predicates(ns, size, stack);
7341 }
7342 }
7343 else
7344 {
7345 if (c.n.node())
7346 step_fill(ns, c.n.node(), stack.result, v);
7347 else if (attributes)
7348 step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
7349
7350 apply_predicates(ns, 0, stack);
7351 }
7352
7353 // child, attribute and self axes always generate unique set of nodes
7354 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
7355 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
7356 ns.remove_duplicates();
7357
7358 return ns;
7359 }
7360
7361 public:
7362 xpath_ast_node(ast_type_t type, xpath_value_type rettype, const char_t* value):
7363 _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
7364 {
7365 assert(type == ast_string_constant);
7366 _data.string = value;
7367 }
7368
7369 xpath_ast_node(ast_type_t type, xpath_value_type rettype, double value):
7370 _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
7371 {
7372 assert(type == ast_number_constant);
7373 _data.number = value;
7374 }
7375
7376 xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value):
7377 _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(0), _right(0), _next(0)
7378 {
7379 assert(type == ast_variable);
7380 _data.variable = value;
7381 }
7382
7383 xpath_ast_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
7384 _type((char)type), _rettype((char)rettype), _axis(0), _test(0), _left(left), _right(right), _next(0)
7385 {
7386 }
7387
7388 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
7389 _type((char)type), _rettype(xpath_type_node_set), _axis((char)axis), _test((char)test), _left(left), _right(0), _next(0)
7390 {
7391 _data.nodetest = contents;
7392 }
7393
7394 void set_next(xpath_ast_node* value)
7395 {
7396 _next = value;
7397 }
7398
7399 void set_right(xpath_ast_node* value)
7400 {
7401 _right = value;
7402 }
7403
7404 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
7405 {
7406 switch (_type)
7407 {
7408 case ast_op_or:
7409 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
7410
7411 case ast_op_and:
7412 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
7413
7414 case ast_op_equal:
7415 return compare_eq(_left, _right, c, stack, equal_to());
7416
7417 case ast_op_not_equal:
7418 return compare_eq(_left, _right, c, stack, not_equal_to());
7419
7420 case ast_op_less:
7421 return compare_rel(_left, _right, c, stack, less());
7422
7423 case ast_op_greater:
7424 return compare_rel(_right, _left, c, stack, less());
7425
7426 case ast_op_less_or_equal:
7427 return compare_rel(_left, _right, c, stack, less_equal());
7428
7429 case ast_op_greater_or_equal:
7430 return compare_rel(_right, _left, c, stack, less_equal());
7431
7432 case ast_func_starts_with:
7433 {
7434 xpath_allocator_capture cr(stack.result);
7435
7436 xpath_string lr = _left->eval_string(c, stack);
7437 xpath_string rr = _right->eval_string(c, stack);
7438
7439 return starts_with(lr.c_str(), rr.c_str());
7440 }
7441
7442 case ast_func_contains:
7443 {
7444 xpath_allocator_capture cr(stack.result);
7445
7446 xpath_string lr = _left->eval_string(c, stack);
7447 xpath_string rr = _right->eval_string(c, stack);
7448
7449 return find_substring(lr.c_str(), rr.c_str()) != 0;
7450 }
7451
7452 case ast_func_boolean:
7453 return _left->eval_boolean(c, stack);
7454
7455 case ast_func_not:
7456 return !_left->eval_boolean(c, stack);
7457
7458 case ast_func_true:
7459 return true;
7460
7461 case ast_func_false:
7462 return false;
7463
7464 case ast_func_lang:
7465 {
7466 if (c.n.attribute()) return false;
7467
7468 xpath_allocator_capture cr(stack.result);
7469
7470 xpath_string lang = _left->eval_string(c, stack);
7471
7472 for (xml_node n = c.n.node(); n; n = n.parent())
7473 {
7474 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
7475
7476 if (a)
7477 {
7478 const char_t* value = a.value();
7479
7480 // strnicmp / strncasecmp is not portable
7481 for (const char_t* lit = lang.c_str(); *lit; ++lit)
7482 {
7483 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
7484 ++value;
7485 }
7486
7487 return *value == 0 || *value == '-';
7488 }
7489 }
7490
7491 return false;
7492 }
7493
7494 case ast_variable:
7495 {
7496 assert(_rettype == _data.variable->type());
7497
7498 if (_rettype == xpath_type_boolean)
7499 return _data.variable->get_boolean();
7500
7501 // fallthrough to type conversion
7502 }
7503
7504 default:
7505 {
7506 switch (_rettype)
7507 {
7508 case xpath_type_number:
7509 return convert_number_to_boolean(eval_number(c, stack));
7510
7511 case xpath_type_string:
7512 {
7513 xpath_allocator_capture cr(stack.result);
7514
7515 return !eval_string(c, stack).empty();
7516 }
7517
7518 case xpath_type_node_set:
7519 {
7520 xpath_allocator_capture cr(stack.result);
7521
7522 return !eval_node_set(c, stack).empty();
7523 }
7524
7525 default:
7526 assert(!"Wrong expression for return type boolean");
7527 return false;
7528 }
7529 }
7530 }
7531 }
7532
7533 double eval_number(const xpath_context& c, const xpath_stack& stack)
7534 {
7535 switch (_type)
7536 {
7537 case ast_op_add:
7538 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
7539
7540 case ast_op_subtract:
7541 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
7542
7543 case ast_op_multiply:
7544 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
7545
7546 case ast_op_divide:
7547 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
7548
7549 case ast_op_mod:
7550 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
7551
7552 case ast_op_negate:
7553 return -_left->eval_number(c, stack);
7554
7555 case ast_number_constant:
7556 return _data.number;
7557
7558 case ast_func_last:
7559 return (double)c.size;
7560
7561 case ast_func_position:
7562 return (double)c.position;
7563
7564 case ast_func_count:
7565 {
7566 xpath_allocator_capture cr(stack.result);
7567
7568 return (double)_left->eval_node_set(c, stack).size();
7569 }
7570
7571 case ast_func_string_length_0:
7572 {
7573 xpath_allocator_capture cr(stack.result);
7574
7575 return (double)string_value(c.n, stack.result).length();
7576 }
7577
7578 case ast_func_string_length_1:
7579 {
7580 xpath_allocator_capture cr(stack.result);
7581
7582 return (double)_left->eval_string(c, stack).length();
7583 }
7584
7585 case ast_func_number_0:
7586 {
7587 xpath_allocator_capture cr(stack.result);
7588
7589 return convert_string_to_number(string_value(c.n, stack.result).c_str());
7590 }
7591
7592 case ast_func_number_1:
7593 return _left->eval_number(c, stack);
7594
7595 case ast_func_sum:
7596 {
7597 xpath_allocator_capture cr(stack.result);
7598
7599 double r = 0;
7600
7601 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
7602
7603 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
7604 {
7605 xpath_allocator_capture cri(stack.result);
7606
7607 r += convert_string_to_number(string_value(*it, stack.result).c_str());
7608 }
7609
7610 return r;
7611 }
7612
7613 case ast_func_floor:
7614 {
7615 double r = _left->eval_number(c, stack);
7616
7617 return r == r ? floor(r) : r;
7618 }
7619
7620 case ast_func_ceiling:
7621 {
7622 double r = _left->eval_number(c, stack);
7623
7624 return r == r ? ceil(r) : r;
7625 }
7626
7627 case ast_func_round:
7628 return round_nearest_nzero(_left->eval_number(c, stack));
7629
7630 case ast_variable:
7631 {
7632 assert(_rettype == _data.variable->type());
7633
7634 if (_rettype == xpath_type_number)
7635 return _data.variable->get_number();
7636
7637 // fallthrough to type conversion
7638 }
7639
7640 default:
7641 {
7642 switch (_rettype)
7643 {
7644 case xpath_type_boolean:
7645 return eval_boolean(c, stack) ? 1 : 0;
7646
7647 case xpath_type_string:
7648 {
7649 xpath_allocator_capture cr(stack.result);
7650
7651 return convert_string_to_number(eval_string(c, stack).c_str());
7652 }
7653
7654 case xpath_type_node_set:
7655 {
7656 xpath_allocator_capture cr(stack.result);
7657
7658 return convert_string_to_number(eval_string(c, stack).c_str());
7659 }
7660
7661 default:
7662 assert(!"Wrong expression for return type number");
7663 return 0;
7664 }
7665
7666 }
7667 }
7668 }
7669
7670 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
7671 {
7672 assert(_type == ast_func_concat);
7673
7674 xpath_allocator_capture ct(stack.temp);
7675
7676 // count the string number
7677 size_t count = 1;
7678 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
7679
7680 // gather all strings
7681 xpath_string static_buffer[4];
7682 xpath_string* buffer = static_buffer;
7683
7684 // allocate on-heap for large concats
7685 if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
7686 {
7687 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
7688 assert(buffer);
7689 }
7690
7691 // evaluate all strings to temporary stack
7692 xpath_stack swapped_stack = {stack.temp, stack.result};
7693
7694 buffer[0] = _left->eval_string(c, swapped_stack);
7695
7696 size_t pos = 1;
7697 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
7698 assert(pos == count);
7699
7700 // get total length
7701 size_t length = 0;
7702 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
7703
7704 // create final string
7705 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
7706 assert(result);
7707
7708 char_t* ri = result;
7709
7710 for (size_t j = 0; j < count; ++j)
7711 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
7712 *ri++ = *bi;
7713
7714 *ri = 0;
7715
7716 return xpath_string(result, true);
7717 }
7718
7719 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
7720 {
7721 switch (_type)
7722 {
7723 case ast_string_constant:
7724 return xpath_string_const(_data.string);
7725
7726 case ast_func_local_name_0:
7727 {
7728 xpath_node na = c.n;
7729
7730 return xpath_string_const(local_name(na));
7731 }
7732
7733 case ast_func_local_name_1:
7734 {
7735 xpath_allocator_capture cr(stack.result);
7736
7737 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
7738 xpath_node na = ns.first();
7739
7740 return xpath_string_const(local_name(na));
7741 }
7742
7743 case ast_func_name_0:
7744 {
7745 xpath_node na = c.n;
7746
7747 return xpath_string_const(qualified_name(na));
7748 }
7749
7750 case ast_func_name_1:
7751 {
7752 xpath_allocator_capture cr(stack.result);
7753
7754 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
7755 xpath_node na = ns.first();
7756
7757 return xpath_string_const(qualified_name(na));
7758 }
7759
7760 case ast_func_namespace_uri_0:
7761 {
7762 xpath_node na = c.n;
7763
7764 return xpath_string_const(namespace_uri(na));
7765 }
7766
7767 case ast_func_namespace_uri_1:
7768 {
7769 xpath_allocator_capture cr(stack.result);
7770
7771 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
7772 xpath_node na = ns.first();
7773
7774 return xpath_string_const(namespace_uri(na));
7775 }
7776
7777 case ast_func_string_0:
7778 return string_value(c.n, stack.result);
7779
7780 case ast_func_string_1:
7781 return _left->eval_string(c, stack);
7782
7783 case ast_func_concat:
7784 return eval_string_concat(c, stack);
7785
7786 case ast_func_substring_before:
7787 {
7788 xpath_allocator_capture cr(stack.temp);
7789
7790 xpath_stack swapped_stack = {stack.temp, stack.result};
7791
7792 xpath_string s = _left->eval_string(c, swapped_stack);
7793 xpath_string p = _right->eval_string(c, swapped_stack);
7794
7795 const char_t* pos = find_substring(s.c_str(), p.c_str());
7796
7797 return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
7798 }
7799
7800 case ast_func_substring_after:
7801 {
7802 xpath_allocator_capture cr(stack.temp);
7803
7804 xpath_stack swapped_stack = {stack.temp, stack.result};
7805
7806 xpath_string s = _left->eval_string(c, swapped_stack);
7807 xpath_string p = _right->eval_string(c, swapped_stack);
7808
7809 const char_t* pos = find_substring(s.c_str(), p.c_str());
7810 if (!pos) return xpath_string();
7811
7812 const char_t* result = pos + p.length();
7813
7814 return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
7815 }
7816
7817 case ast_func_substring_2:
7818 {
7819 xpath_allocator_capture cr(stack.temp);
7820
7821 xpath_stack swapped_stack = {stack.temp, stack.result};
7822
7823 xpath_string s = _left->eval_string(c, swapped_stack);
7824 size_t s_length = s.length();
7825
7826 double first = round_nearest(_right->eval_number(c, stack));
7827
7828 if (is_nan(first)) return xpath_string(); // NaN
7829 else if (first >= s_length + 1) return xpath_string();
7830
7831 size_t pos = first < 1 ? 1 : (size_t)first;
7832 assert(1 <= pos && pos <= s_length + 1);
7833
7834 const char_t* rbegin = s.c_str() + (pos - 1);
7835
7836 return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
7837 }
7838
7839 case ast_func_substring_3:
7840 {
7841 xpath_allocator_capture cr(stack.temp);
7842
7843 xpath_stack swapped_stack = {stack.temp, stack.result};
7844
7845 xpath_string s = _left->eval_string(c, swapped_stack);
7846 size_t s_length = s.length();
7847
7848 double first = round_nearest(_right->eval_number(c, stack));
7849 double last = first + round_nearest(_right->_next->eval_number(c, stack));
7850
7851 if (is_nan(first) || is_nan(last)) return xpath_string();
7852 else if (first >= s_length + 1) return xpath_string();
7853 else if (first >= last) return xpath_string();
7854 else if (last < 1) return xpath_string();
7855
7856 size_t pos = first < 1 ? 1 : (size_t)first;
7857 size_t end = last >= s_length + 1 ? s_length + 1 : (size_t)last;
7858
7859 assert(1 <= pos && pos <= end && end <= s_length + 1);
7860 const char_t* rbegin = s.c_str() + (pos - 1);
7861 const char_t* rend = s.c_str() + (end - 1);
7862
7863 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
7864 }
7865
7866 case ast_func_normalize_space_0:
7867 {
7868 xpath_string s = string_value(c.n, stack.result);
7869
7870 normalize_space(s.data(stack.result));
7871
7872 return s;
7873 }
7874
7875 case ast_func_normalize_space_1:
7876 {
7877 xpath_string s = _left->eval_string(c, stack);
7878
7879 normalize_space(s.data(stack.result));
7880
7881 return s;
7882 }
7883
7884 case ast_func_translate:
7885 {
7886 xpath_allocator_capture cr(stack.temp);
7887
7888 xpath_stack swapped_stack = {stack.temp, stack.result};
7889
7890 xpath_string s = _left->eval_string(c, stack);
7891 xpath_string from = _right->eval_string(c, swapped_stack);
7892 xpath_string to = _right->_next->eval_string(c, swapped_stack);
7893
7894 translate(s.data(stack.result), from.c_str(), to.c_str());
7895
7896 return s;
7897 }
7898
7899 case ast_variable:
7900 {
7901 assert(_rettype == _data.variable->type());
7902
7903 if (_rettype == xpath_type_string)
7904 return xpath_string_const(_data.variable->get_string());
7905
7906 // fallthrough to type conversion
7907 }
7908
7909 default:
7910 {
7911 switch (_rettype)
7912 {
7913 case xpath_type_boolean:
7914 return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
7915
7916 case xpath_type_number:
7917 return convert_number_to_string(eval_number(c, stack), stack.result);
7918
7919 case xpath_type_node_set:
7920 {
7921 xpath_allocator_capture cr(stack.temp);
7922
7923 xpath_stack swapped_stack = {stack.temp, stack.result};
7924
7925 xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
7926 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
7927 }
7928
7929 default:
7930 assert(!"Wrong expression for return type string");
7931 return xpath_string();
7932 }
7933 }
7934 }
7935 }
7936
7937 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
7938 {
7939 switch (_type)
7940 {
7941 case ast_op_union:
7942 {
7943 xpath_allocator_capture cr(stack.temp);
7944
7945 xpath_stack swapped_stack = {stack.temp, stack.result};
7946
7947 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
7948 xpath_node_set_raw rs = _right->eval_node_set(c, stack);
7949
7950 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
7951 rs.set_type(xpath_node_set::type_unsorted);
7952
7953 rs.append(ls.begin(), ls.end(), stack.result);
7954 rs.remove_duplicates();
7955
7956 return rs;
7957 }
7958
7959 case ast_filter:
7960 case ast_filter_posinv:
7961 {
7962 xpath_node_set_raw set = _left->eval_node_set(c, stack);
7963
7964 // either expression is a number or it contains position() call; sort by document order
7965 if (_type == ast_filter) set.sort_do();
7966
7967 apply_predicate(set, 0, _right, stack);
7968
7969 return set;
7970 }
7971
7972 case ast_func_id:
7973 return xpath_node_set_raw();
7974
7975 case ast_step:
7976 {
7977 switch (_axis)
7978 {
7979 case axis_ancestor:
7980 return step_do(c, stack, axis_to_type<axis_ancestor>());
7981
7982 case axis_ancestor_or_self:
7983 return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
7984
7985 case axis_attribute:
7986 return step_do(c, stack, axis_to_type<axis_attribute>());
7987
7988 case axis_child:
7989 return step_do(c, stack, axis_to_type<axis_child>());
7990
7991 case axis_descendant:
7992 return step_do(c, stack, axis_to_type<axis_descendant>());
7993
7994 case axis_descendant_or_self:
7995 return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
7996
7997 case axis_following:
7998 return step_do(c, stack, axis_to_type<axis_following>());
7999
8000 case axis_following_sibling:
8001 return step_do(c, stack, axis_to_type<axis_following_sibling>());
8002
8003 case axis_namespace:
8004 // namespaced axis is not supported
8005 return xpath_node_set_raw();
8006
8007 case axis_parent:
8008 return step_do(c, stack, axis_to_type<axis_parent>());
8009
8010 case axis_preceding:
8011 return step_do(c, stack, axis_to_type<axis_preceding>());
8012
8013 case axis_preceding_sibling:
8014 return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
8015
8016 case axis_self:
8017 return step_do(c, stack, axis_to_type<axis_self>());
8018 }
8019 }
8020
8021 case ast_step_root:
8022 {
8023 assert(!_right); // root step can't have any predicates
8024
8025 xpath_node_set_raw ns;
8026
8027 ns.set_type(xpath_node_set::type_sorted);
8028
8029 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
8030 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
8031
8032 return ns;
8033 }
8034
8035 case ast_variable:
8036 {
8037 assert(_rettype == _data.variable->type());
8038
8039 if (_rettype == xpath_type_node_set)
8040 {
8041 const xpath_node_set& s = _data.variable->get_node_set();
8042
8043 xpath_node_set_raw ns;
8044
8045 ns.set_type(s.type());
8046 ns.append(s.begin(), s.end(), stack.result);
8047
8048 return ns;
8049 }
8050
8051 // fallthrough to type conversion
8052 }
8053
8054 default:
8055 assert(!"Wrong expression for return type node set");
8056 return xpath_node_set_raw();
8057 }
8058 }
8059
8060 bool is_posinv()
8061 {
8062 switch (_type)
8063 {
8064 case ast_func_position:
8065 return false;
8066
8067 case ast_string_constant:
8068 case ast_number_constant:
8069 case ast_variable:
8070 return true;
8071
8072 case ast_step:
8073 case ast_step_root:
8074 return true;
8075
8076 case ast_predicate:
8077 case ast_filter:
8078 case ast_filter_posinv:
8079 return true;
8080
8081 default:
8082 if (_left && !_left->is_posinv()) return false;
8083
8084 for (xpath_ast_node* n = _right; n; n = n->_next)
8085 if (!n->is_posinv()) return false;
8086
8087 return true;
8088 }
8089 }
8090
8091 xpath_value_type rettype() const
8092 {
8093 return static_cast<xpath_value_type>(_rettype);
8094 }
8095 };
8096
8097 struct xpath_parser
8098 {
8099 xpath_allocator* _alloc;
8100 xpath_lexer _lexer;
8101
8102 const char_t* _query;
8103 xpath_variable_set* _variables;
8104
8105 xpath_parse_result* _result;
8106
8107 #ifdef PUGIXML_NO_EXCEPTIONS
8108 jmp_buf _error_handler;
8109 #endif
8110
8111 void throw_error(const char* message)
8112 {
8113 _result->error = message;
8114 _result->offset = _lexer.current_pos() - _query;
8115
8116 #ifdef PUGIXML_NO_EXCEPTIONS
8117 longjmp(_error_handler, 1);
8118 #else
8119 throw xpath_exception(*_result);
8120 #endif
8121 }
8122
8123 void throw_error_oom()
8124 {
8125 #ifdef PUGIXML_NO_EXCEPTIONS
8126 throw_error("Out of memory");
8127 #else
8128 throw std::bad_alloc();
8129 #endif
8130 }
8131
8132 void* alloc_node()
8133 {
8134 void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
8135
8136 if (!result) throw_error_oom();
8137
8138 return result;
8139 }
8140
8141 const char_t* alloc_string(const xpath_lexer_string& value)
8142 {
8143 if (value.begin)
8144 {
8145 size_t length = static_cast<size_t>(value.end - value.begin);
8146
8147 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
8148 if (!c) throw_error_oom();
8149
8150 memcpy(c, value.begin, length * sizeof(char_t));
8151 c[length] = 0;
8152
8153 return c;
8154 }
8155 else return 0;
8156 }
8157
8158 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
8159 {
8160 assert(argc <= 1);
8161
8162 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
8163
8164 return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
8165 }
8166
8167 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
8168 {
8169 switch (name.begin[0])
8170 {
8171 case 'b':
8172 if (name == PUGIXML_TEXT("boolean") && argc == 1)
8173 return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
8174
8175 break;
8176
8177 case 'c':
8178 if (name == PUGIXML_TEXT("count") && argc == 1)
8179 {
8180 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
8181 return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
8182 }
8183 else if (name == PUGIXML_TEXT("contains") && argc == 2)
8184 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
8185 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
8186 return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
8187 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
8188 return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
8189
8190 break;
8191
8192 case 'f':
8193 if (name == PUGIXML_TEXT("false") && argc == 0)
8194 return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
8195 else if (name == PUGIXML_TEXT("floor") && argc == 1)
8196 return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
8197
8198 break;
8199
8200 case 'i':
8201 if (name == PUGIXML_TEXT("id") && argc == 1)
8202 return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
8203
8204 break;
8205
8206 case 'l':
8207 if (name == PUGIXML_TEXT("last") && argc == 0)
8208 return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
8209 else if (name == PUGIXML_TEXT("lang") && argc == 1)
8210 return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
8211 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
8212 return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
8213
8214 break;
8215
8216 case 'n':
8217 if (name == PUGIXML_TEXT("name") && argc <= 1)
8218 return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
8219 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
8220 return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
8221 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
8222 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
8223 else if (name == PUGIXML_TEXT("not") && argc == 1)
8224 return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
8225 else if (name == PUGIXML_TEXT("number") && argc <= 1)
8226 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
8227
8228 break;
8229
8230 case 'p':
8231 if (name == PUGIXML_TEXT("position") && argc == 0)
8232 return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
8233
8234 break;
8235
8236 case 'r':
8237 if (name == PUGIXML_TEXT("round") && argc == 1)
8238 return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
8239
8240 break;
8241
8242 case 's':
8243 if (name == PUGIXML_TEXT("string") && argc <= 1)
8244 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
8245 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
8246 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]);
8247 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
8248 return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
8249 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
8250 return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
8251 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
8252 return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
8253 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
8254 return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
8255 else if (name == PUGIXML_TEXT("sum") && argc == 1)
8256 {
8257 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
8258 return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
8259 }
8260
8261 break;
8262
8263 case 't':
8264 if (name == PUGIXML_TEXT("translate") && argc == 3)
8265 return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
8266 else if (name == PUGIXML_TEXT("true") && argc == 0)
8267 return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
8268
8269 break;
8270 }
8271
8272 throw_error("Unrecognized function or wrong parameter count");
8273
8274 return 0;
8275 }
8276
8277 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
8278 {
8279 specified = true;
8280
8281 switch (name.begin[0])
8282 {
8283 case 'a':
8284 if (name == PUGIXML_TEXT("ancestor"))
8285 return axis_ancestor;
8286 else if (name == PUGIXML_TEXT("ancestor-or-self"))
8287 return axis_ancestor_or_self;
8288 else if (name == PUGIXML_TEXT("attribute"))
8289 return axis_attribute;
8290
8291 break;
8292
8293 case 'c':
8294 if (name == PUGIXML_TEXT("child"))
8295 return axis_child;
8296
8297 break;
8298
8299 case 'd':
8300 if (name == PUGIXML_TEXT("descendant"))
8301 return axis_descendant;
8302 else if (name == PUGIXML_TEXT("descendant-or-self"))
8303 return axis_descendant_or_self;
8304
8305 break;
8306
8307 case 'f':
8308 if (name == PUGIXML_TEXT("following"))
8309 return axis_following;
8310 else if (name == PUGIXML_TEXT("following-sibling"))
8311 return axis_following_sibling;
8312
8313 break;
8314
8315 case 'n':
8316 if (name == PUGIXML_TEXT("namespace"))
8317 return axis_namespace;
8318
8319 break;
8320
8321 case 'p':
8322 if (name == PUGIXML_TEXT("parent"))
8323 return axis_parent;
8324 else if (name == PUGIXML_TEXT("preceding"))
8325 return axis_preceding;
8326 else if (name == PUGIXML_TEXT("preceding-sibling"))
8327 return axis_preceding_sibling;
8328
8329 break;
8330
8331 case 's':
8332 if (name == PUGIXML_TEXT("self"))
8333 return axis_self;
8334
8335 break;
8336 }
8337
8338 specified = false;
8339 return axis_child;
8340 }
8341
8342 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
8343 {
8344 switch (name.begin[0])
8345 {
8346 case 'c':
8347 if (name == PUGIXML_TEXT("comment"))
8348 return nodetest_type_comment;
8349
8350 break;
8351
8352 case 'n':
8353 if (name == PUGIXML_TEXT("node"))
8354 return nodetest_type_node;
8355
8356 break;
8357
8358 case 'p':
8359 if (name == PUGIXML_TEXT("processing-instruction"))
8360 return nodetest_type_pi;
8361
8362 break;
8363
8364 case 't':
8365 if (name == PUGIXML_TEXT("text"))
8366 return nodetest_type_text;
8367
8368 break;
8369 }
8370
8371 return nodetest_none;
8372 }
8373
8374 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
8375 xpath_ast_node* parse_primary_expression()
8376 {
8377 switch (_lexer.current())
8378 {
8379 case lex_var_ref:
8380 {
8381 xpath_lexer_string name = _lexer.contents();
8382
8383 if (!_variables)
8384 throw_error("Unknown variable: variable set is not provided");
8385
8386 xpath_variable* var = get_variable(_variables, name.begin, name.end);
8387
8388 if (!var)
8389 throw_error("Unknown variable: variable set does not contain the given name");
8390
8391 _lexer.next();
8392
8393 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
8394 }
8395
8396 case lex_open_brace:
8397 {
8398 _lexer.next();
8399
8400 xpath_ast_node* n = parse_expression();
8401
8402 if (_lexer.current() != lex_close_brace)
8403 throw_error("Unmatched braces");
8404
8405 _lexer.next();
8406
8407 return n;
8408 }
8409
8410 case lex_quoted_string:
8411 {
8412 const char_t* value = alloc_string(_lexer.contents());
8413
8414 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
8415 _lexer.next();
8416
8417 return n;
8418 }
8419
8420 case lex_number:
8421 {
8422 double value = 0;
8423
8424 if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value))
8425 throw_error_oom();
8426
8427 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
8428 _lexer.next();
8429
8430 return n;
8431 }
8432
8433 case lex_string:
8434 {
8435 xpath_ast_node* args[2] = {0};
8436 size_t argc = 0;
8437
8438 xpath_lexer_string function = _lexer.contents();
8439 _lexer.next();
8440
8441 xpath_ast_node* last_arg = 0;
8442
8443 if (_lexer.current() != lex_open_brace)
8444 throw_error("Unrecognized function call");
8445 _lexer.next();
8446
8447 if (_lexer.current() != lex_close_brace)
8448 args[argc++] = parse_expression();
8449
8450 while (_lexer.current() != lex_close_brace)
8451 {
8452 if (_lexer.current() != lex_comma)
8453 throw_error("No comma between function arguments");
8454 _lexer.next();
8455
8456 xpath_ast_node* n = parse_expression();
8457
8458 if (argc < 2) args[argc] = n;
8459 else last_arg->set_next(n);
8460
8461 argc++;
8462 last_arg = n;
8463 }
8464
8465 _lexer.next();
8466
8467 return parse_function(function, argc, args);
8468 }
8469
8470 default:
8471 throw_error("Unrecognizable primary expression");
8472
8473 return 0;
8474 }
8475 }
8476
8477 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
8478 // Predicate ::= '[' PredicateExpr ']'
8479 // PredicateExpr ::= Expr
8480 xpath_ast_node* parse_filter_expression()
8481 {
8482 xpath_ast_node* n = parse_primary_expression();
8483
8484 while (_lexer.current() == lex_open_square_brace)
8485 {
8486 _lexer.next();
8487
8488 xpath_ast_node* expr = parse_expression();
8489
8490 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
8491
8492 bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
8493
8494 n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
8495
8496 if (_lexer.current() != lex_close_square_brace)
8497 throw_error("Unmatched square brace");
8498
8499 _lexer.next();
8500 }
8501
8502 return n;
8503 }
8504
8505 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
8506 // AxisSpecifier ::= AxisName '::' | '@'?
8507 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
8508 // NameTest ::= '*' | NCName ':' '*' | QName
8509 // AbbreviatedStep ::= '.' | '..'
8510 xpath_ast_node* parse_step(xpath_ast_node* set)
8511 {
8512 if (set && set->rettype() != xpath_type_node_set)
8513 throw_error("Step has to be applied to node set");
8514
8515 bool axis_specified = false;
8516 axis_t axis = axis_child; // implied child axis
8517
8518 if (_lexer.current() == lex_axis_attribute)
8519 {
8520 axis = axis_attribute;
8521 axis_specified = true;
8522
8523 _lexer.next();
8524 }
8525 else if (_lexer.current() == lex_dot)
8526 {
8527 _lexer.next();
8528
8529 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
8530 }
8531 else if (_lexer.current() == lex_double_dot)
8532 {
8533 _lexer.next();
8534
8535 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
8536 }
8537
8538 nodetest_t nt_type = nodetest_none;
8539 xpath_lexer_string nt_name;
8540
8541 if (_lexer.current() == lex_string)
8542 {
8543 // node name test
8544 nt_name = _lexer.contents();
8545 _lexer.next();
8546
8547 // was it an axis name?
8548 if (_lexer.current() == lex_double_colon)
8549 {
8550 // parse axis name
8551 if (axis_specified) throw_error("Two axis specifiers in one step");
8552
8553 axis = parse_axis_name(nt_name, axis_specified);
8554
8555 if (!axis_specified) throw_error("Unknown axis");
8556
8557 // read actual node test
8558 _lexer.next();
8559
8560 if (_lexer.current() == lex_multiply)
8561 {
8562 nt_type = nodetest_all;
8563 nt_name = xpath_lexer_string();
8564 _lexer.next();
8565 }
8566 else if (_lexer.current() == lex_string)
8567 {
8568 nt_name = _lexer.contents();
8569 _lexer.next();
8570 }
8571 else throw_error("Unrecognized node test");
8572 }
8573
8574 if (nt_type == nodetest_none)
8575 {
8576 // node type test or processing-instruction
8577 if (_lexer.current() == lex_open_brace)
8578 {
8579 _lexer.next();
8580
8581 if (_lexer.current() == lex_close_brace)
8582 {
8583 _lexer.next();
8584
8585 nt_type = parse_node_test_type(nt_name);
8586
8587 if (nt_type == nodetest_none) throw_error("Unrecognized node type");
8588
8589 nt_name = xpath_lexer_string();
8590 }
8591 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
8592 {
8593 if (_lexer.current() != lex_quoted_string)
8594 throw_error("Only literals are allowed as arguments to processing-instruction()");
8595
8596 nt_type = nodetest_pi;
8597 nt_name = _lexer.contents();
8598 _lexer.next();
8599
8600 if (_lexer.current() != lex_close_brace)
8601 throw_error("Unmatched brace near processing-instruction()");
8602 _lexer.next();
8603 }
8604 else
8605 throw_error("Unmatched brace near node type test");
8606
8607 }
8608 // QName or NCName:*
8609 else
8610 {
8611 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
8612 {
8613 nt_name.end--; // erase *
8614
8615 nt_type = nodetest_all_in_namespace;
8616 }
8617 else nt_type = nodetest_name;
8618 }
8619 }
8620 }
8621 else if (_lexer.current() == lex_multiply)
8622 {
8623 nt_type = nodetest_all;
8624 _lexer.next();
8625 }
8626 else throw_error("Unrecognized node test");
8627
8628 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
8629
8630 xpath_ast_node* last = 0;
8631
8632 while (_lexer.current() == lex_open_square_brace)
8633 {
8634 _lexer.next();
8635
8636 xpath_ast_node* expr = parse_expression();
8637
8638 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
8639
8640 if (_lexer.current() != lex_close_square_brace)
8641 throw_error("Unmatched square brace");
8642 _lexer.next();
8643
8644 if (last) last->set_next(pred);
8645 else n->set_right(pred);
8646
8647 last = pred;
8648 }
8649
8650 return n;
8651 }
8652
8653 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
8654 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
8655 {
8656 xpath_ast_node* n = parse_step(set);
8657
8658 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
8659 {
8660 lexeme_t l = _lexer.current();
8661 _lexer.next();
8662
8663 if (l == lex_double_slash)
8664 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
8665
8666 n = parse_step(n);
8667 }
8668
8669 return n;
8670 }
8671
8672 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
8673 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
8674 xpath_ast_node* parse_location_path()
8675 {
8676 if (_lexer.current() == lex_slash)
8677 {
8678 _lexer.next();
8679
8680 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
8681
8682 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
8683 lexeme_t l = _lexer.current();
8684
8685 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
8686 return parse_relative_location_path(n);
8687 else
8688 return n;
8689 }
8690 else if (_lexer.current() == lex_double_slash)
8691 {
8692 _lexer.next();
8693
8694 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
8695 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
8696
8697 return parse_relative_location_path(n);
8698 }
8699
8700 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
8701 return parse_relative_location_path(0);
8702 }
8703
8704 // PathExpr ::= LocationPath
8705 // | FilterExpr
8706 // | FilterExpr '/' RelativeLocationPath
8707 // | FilterExpr '//' RelativeLocationPath
8708 xpath_ast_node* parse_path_expression()
8709 {
8710 // Clarification.
8711 // PathExpr begins with either LocationPath or FilterExpr.
8712 // FilterExpr begins with PrimaryExpr
8713 // PrimaryExpr begins with '$' in case of it being a variable reference,
8714 // '(' in case of it being an expression, string literal, number constant or
8715 // function call.
8716
8717 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
8718 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
8719 _lexer.current() == lex_string)
8720 {
8721 if (_lexer.current() == lex_string)
8722 {
8723 // This is either a function call, or not - if not, we shall proceed with location path
8724 const char_t* state = _lexer.state();
8725
8726 while (IS_CHARTYPE(*state, ct_space)) ++state;
8727
8728 if (*state != '(') return parse_location_path();
8729
8730 // This looks like a function call; however this still can be a node-test. Check it.
8731 if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
8732 }
8733
8734 xpath_ast_node* n = parse_filter_expression();
8735
8736 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
8737 {
8738 lexeme_t l = _lexer.current();
8739 _lexer.next();
8740
8741 if (l == lex_double_slash)
8742 {
8743 if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
8744
8745 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
8746 }
8747
8748 // select from location path
8749 return parse_relative_location_path(n);
8750 }
8751
8752 return n;
8753 }
8754 else return parse_location_path();
8755 }
8756
8757 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
8758 xpath_ast_node* parse_union_expression()
8759 {
8760 xpath_ast_node* n = parse_path_expression();
8761
8762 while (_lexer.current() == lex_union)
8763 {
8764 _lexer.next();
8765
8766 xpath_ast_node* expr = parse_union_expression();
8767
8768 if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
8769 throw_error("Union operator has to be applied to node sets");
8770
8771 n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
8772 }
8773
8774 return n;
8775 }
8776
8777 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
8778 xpath_ast_node* parse_unary_expression()
8779 {
8780 if (_lexer.current() == lex_minus)
8781 {
8782 _lexer.next();
8783
8784 xpath_ast_node* expr = parse_unary_expression();
8785
8786 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
8787 }
8788 else return parse_union_expression();
8789 }
8790
8791 // MultiplicativeExpr ::= UnaryExpr
8792 // | MultiplicativeExpr '*' UnaryExpr
8793 // | MultiplicativeExpr 'div' UnaryExpr
8794 // | MultiplicativeExpr 'mod' UnaryExpr
8795 xpath_ast_node* parse_multiplicative_expression()
8796 {
8797 xpath_ast_node* n = parse_unary_expression();
8798
8799 while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
8800 (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
8801 {
8802 ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
8803 _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
8804 _lexer.next();
8805
8806 xpath_ast_node* expr = parse_unary_expression();
8807
8808 n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
8809 }
8810
8811 return n;
8812 }
8813
8814 // AdditiveExpr ::= MultiplicativeExpr
8815 // | AdditiveExpr '+' MultiplicativeExpr
8816 // | AdditiveExpr '-' MultiplicativeExpr
8817 xpath_ast_node* parse_additive_expression()
8818 {
8819 xpath_ast_node* n = parse_multiplicative_expression();
8820
8821 while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
8822 {
8823 lexeme_t l = _lexer.current();
8824
8825 _lexer.next();
8826
8827 xpath_ast_node* expr = parse_multiplicative_expression();
8828
8829 n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
8830 }
8831
8832 return n;
8833 }
8834
8835 // RelationalExpr ::= AdditiveExpr
8836 // | RelationalExpr '<' AdditiveExpr
8837 // | RelationalExpr '>' AdditiveExpr
8838 // | RelationalExpr '<=' AdditiveExpr
8839 // | RelationalExpr '>=' AdditiveExpr
8840 xpath_ast_node* parse_relational_expression()
8841 {
8842 xpath_ast_node* n = parse_additive_expression();
8843
8844 while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
8845 _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
8846 {
8847 lexeme_t l = _lexer.current();
8848 _lexer.next();
8849
8850 xpath_ast_node* expr = parse_additive_expression();
8851
8852 n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
8853 l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
8854 }
8855
8856 return n;
8857 }
8858
8859 // EqualityExpr ::= RelationalExpr
8860 // | EqualityExpr '=' RelationalExpr
8861 // | EqualityExpr '!=' RelationalExpr
8862 xpath_ast_node* parse_equality_expression()
8863 {
8864 xpath_ast_node* n = parse_relational_expression();
8865
8866 while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
8867 {
8868 lexeme_t l = _lexer.current();
8869
8870 _lexer.next();
8871
8872 xpath_ast_node* expr = parse_relational_expression();
8873
8874 n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
8875 }
8876
8877 return n;
8878 }
8879
8880 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
8881 xpath_ast_node* parse_and_expression()
8882 {
8883 xpath_ast_node* n = parse_equality_expression();
8884
8885 while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
8886 {
8887 _lexer.next();
8888
8889 xpath_ast_node* expr = parse_equality_expression();
8890
8891 n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
8892 }
8893
8894 return n;
8895 }
8896
8897 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
8898 xpath_ast_node* parse_or_expression()
8899 {
8900 xpath_ast_node* n = parse_and_expression();
8901
8902 while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
8903 {
8904 _lexer.next();
8905
8906 xpath_ast_node* expr = parse_and_expression();
8907
8908 n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
8909 }
8910
8911 return n;
8912 }
8913
8914 // Expr ::= OrExpr
8915 xpath_ast_node* parse_expression()
8916 {
8917 return parse_or_expression();
8918 }
8919
8920 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
8921 {
8922 }
8923
8924 xpath_ast_node* parse()
8925 {
8926 xpath_ast_node* result = parse_expression();
8927
8928 if (_lexer.current() != lex_eof)
8929 {
8930 // there are still unparsed tokens left, error
8931 throw_error("Incorrect query");
8932 }
8933
8934 return result;
8935 }
8936
8937 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
8938 {
8939 xpath_parser parser(query, variables, alloc, result);
8940
8941 #ifdef PUGIXML_NO_EXCEPTIONS
8942 int error = setjmp(parser._error_handler);
8943
8944 return (error == 0) ? parser.parse() : 0;
8945 #else
8946 return parser.parse();
8947 #endif
8948 }
8949 };
8950
8951 struct xpath_query_impl
8952 {
8953 static xpath_query_impl* create()
8954 {
8955 void* memory = global_allocate(sizeof(xpath_query_impl));
8956
8957 return new (memory) xpath_query_impl();
8958 }
8959
8960 static void destroy(void* ptr)
8961 {
8962 if (!ptr) return;
8963
8964 // free all allocated pages
8965 static_cast<xpath_query_impl*>(ptr)->alloc.release();
8966
8967 // free allocator memory (with the first page)
8968 global_deallocate(ptr);
8969 }
8970
8971 xpath_query_impl(): root(0), alloc(&block)
8972 {
8973 block.next = 0;
8974 }
8975
8976 xpath_ast_node* root;
8977 xpath_allocator alloc;
8978 xpath_memory_block block;
8979 };
8980
8981 xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
8982 {
8983 if (!impl) return xpath_string();
8984
8985 #ifdef PUGIXML_NO_EXCEPTIONS
8986 if (setjmp(sd.error_handler)) return xpath_string();
8987 #endif
8988
8989 xpath_context c(n, 1, 1);
8990
8991 return impl->root->eval_string(c, sd.stack);
8992 }
8993}
8994
8995namespace pugi
8996{
8997#ifndef PUGIXML_NO_EXCEPTIONS
8998 xpath_exception::xpath_exception(const xpath_parse_result& result): _result(result)
8999 {
9000 assert(result.error);
9001 }
9002
9003 const char* xpath_exception::what() const throw()
9004 {
9005 return _result.error;
9006 }
9007
9008 const xpath_parse_result& xpath_exception::result() const
9009 {
9010 return _result;
9011 }
9012#endif
9013
9014 xpath_node::xpath_node()
9015 {
9016 }
9017
9018 xpath_node::xpath_node(const xml_node& node): _node(node)
9019 {
9020 }
9021
9022 xpath_node::xpath_node(const xml_attribute& attribute, const xml_node& parent): _node(attribute ? parent : xml_node()), _attribute(attribute)
9023 {
9024 }
9025
9026 xml_node xpath_node::node() const
9027 {
9028 return _attribute ? xml_node() : _node;
9029 }
9030
9031 xml_attribute xpath_node::attribute() const
9032 {
9033 return _attribute;
9034 }
9035
9036 xml_node xpath_node::parent() const
9037 {
9038 return _attribute ? _node : _node.parent();
9039 }
9040
9041 xpath_node::operator xpath_node::unspecified_bool_type() const
9042 {
9043 return (_node || _attribute) ? &xpath_node::_node : 0;
9044 }
9045
9046 bool xpath_node::operator!() const
9047 {
9048 return !(_node || _attribute);
9049 }
9050
9051 bool xpath_node::operator==(const xpath_node& n) const
9052 {
9053 return _node == n._node && _attribute == n._attribute;
9054 }
9055
9056 bool xpath_node::operator!=(const xpath_node& n) const
9057 {
9058 return _node != n._node || _attribute != n._attribute;
9059 }
9060
9061#ifdef __BORLANDC__
9062 bool operator&&(const xpath_node& lhs, bool rhs)
9063 {
9064 return (bool)lhs && rhs;
9065 }
9066
9067 bool operator||(const xpath_node& lhs, bool rhs)
9068 {
9069 return (bool)lhs || rhs;
9070 }
9071#endif
9072
9073 void xpath_node_set::_assign(const_iterator begin, const_iterator end)
9074 {
9075 assert(begin <= end);
9076
9077 size_t size = static_cast<size_t>(end - begin);
9078
9079 if (size <= 1)
9080 {
9081 // deallocate old buffer
9082 if (_begin != &_storage) global_deallocate(_begin);
9083
9084 // use internal buffer
9085 if (begin != end) _storage = *begin;
9086
9087 _begin = &_storage;
9088 _end = &_storage + size;
9089 }
9090 else
9091 {
9092 // make heap copy
9093 xpath_node* storage = static_cast<xpath_node*>(global_allocate(size * sizeof(xpath_node)));
9094
9095 if (!storage)
9096 {
9097 #ifdef PUGIXML_NO_EXCEPTIONS
9098 return;
9099 #else
9100 throw std::bad_alloc();
9101 #endif
9102 }
9103
9104 memcpy(storage, begin, size * sizeof(xpath_node));
9105
9106 // deallocate old buffer
9107 if (_begin != &_storage) global_deallocate(_begin);
9108
9109 // finalize
9110 _begin = storage;
9111 _end = storage + size;
9112 }
9113 }
9114
9115 xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
9116 {
9117 }
9118
9119 xpath_node_set::xpath_node_set(const_iterator begin, const_iterator end, type_t type): _type(type), _begin(&_storage), _end(&_storage)
9120 {
9121 _assign(begin, end);
9122 }
9123
9124 xpath_node_set::~xpath_node_set()
9125 {
9126 if (_begin != &_storage) global_deallocate(_begin);
9127 }
9128
9129 xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
9130 {
9131 _assign(ns._begin, ns._end);
9132 }
9133
9134 xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
9135 {
9136 if (this == &ns) return *this;
9137
9138 _type = ns._type;
9139 _assign(ns._begin, ns._end);
9140
9141 return *this;
9142 }
9143
9144 xpath_node_set::type_t xpath_node_set::type() const
9145 {
9146 return _type;
9147 }
9148
9149 size_t xpath_node_set::size() const
9150 {
9151 return _end - _begin;
9152 }
9153
9154 bool xpath_node_set::empty() const
9155 {
9156 return _begin == _end;
9157 }
9158
9159 const xpath_node& xpath_node_set::operator[](size_t index) const
9160 {
9161 assert(index < size());
9162 return _begin[index];
9163 }
9164
9165 xpath_node_set::const_iterator xpath_node_set::begin() const
9166 {
9167 return _begin;
9168 }
9169
9170 xpath_node_set::const_iterator xpath_node_set::end() const
9171 {
9172 return _end;
9173 }
9174
9175 void xpath_node_set::sort(bool reverse)
9176 {
9177 _type = xpath_sort(_begin, _end, _type, reverse);
9178 }
9179
9180 xpath_node xpath_node_set::first() const
9181 {
9182 return xpath_first(_begin, _end, _type);
9183 }
9184
9185 xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
9186 {
9187 }
9188
9189 xpath_parse_result::operator bool() const
9190 {
9191 return error == 0;
9192 }
9193 const char* xpath_parse_result::description() const
9194 {
9195 return error ? error : "No error";
9196 }
9197
9198 xpath_variable::xpath_variable()
9199 {
9200 }
9201
9202 const char_t* xpath_variable::name() const
9203 {
9204 switch (_type)
9205 {
9206 case xpath_type_node_set:
9207 return static_cast<const xpath_variable_node_set*>(this)->name;
9208
9209 case xpath_type_number:
9210 return static_cast<const xpath_variable_number*>(this)->name;
9211
9212 case xpath_type_string:
9213 return static_cast<const xpath_variable_string*>(this)->name;
9214
9215 case xpath_type_boolean:
9216 return static_cast<const xpath_variable_boolean*>(this)->name;
9217
9218 default:
9219 assert(!"Invalid variable type");
9220 return 0;
9221 }
9222 }
9223
9224 xpath_value_type xpath_variable::type() const
9225 {
9226 return _type;
9227 }
9228
9229 bool xpath_variable::get_boolean() const
9230 {
9231 return (_type == xpath_type_boolean) ? static_cast<const xpath_variable_boolean*>(this)->value : false;
9232 }
9233
9234 double xpath_variable::get_number() const
9235 {
9236 return (_type == xpath_type_number) ? static_cast<const xpath_variable_number*>(this)->value : gen_nan();
9237 }
9238
9239 const char_t* xpath_variable::get_string() const
9240 {
9241 const char_t* value = (_type == xpath_type_string) ? static_cast<const xpath_variable_string*>(this)->value : 0;
9242 return value ? value : PUGIXML_TEXT("");
9243 }
9244
9245 const xpath_node_set& xpath_variable::get_node_set() const
9246 {
9247 return (_type == xpath_type_node_set) ? static_cast<const xpath_variable_node_set*>(this)->value : dummy_node_set;
9248 }
9249
9250 bool xpath_variable::set(bool value)
9251 {
9252 if (_type != xpath_type_boolean) return false;
9253
9254 static_cast<xpath_variable_boolean*>(this)->value = value;
9255 return true;
9256 }
9257
9258 bool xpath_variable::set(double value)
9259 {
9260 if (_type != xpath_type_number) return false;
9261
9262 static_cast<xpath_variable_number*>(this)->value = value;
9263 return true;
9264 }
9265
9266 bool xpath_variable::set(const char_t* value)
9267 {
9268 if (_type != xpath_type_string) return false;
9269
9270 xpath_variable_string* var = static_cast<xpath_variable_string*>(this);
9271
9272 // duplicate string
9273 size_t size = (strlength(value) + 1) * sizeof(char_t);
9274
9275 char_t* copy = static_cast<char_t*>(global_allocate(size));
9276 if (!copy) return false;
9277
9278 memcpy(copy, value, size);
9279
9280 // replace old string
9281 if (var->value) global_deallocate(var->value);
9282 var->value = copy;
9283
9284 return true;
9285 }
9286
9287 bool xpath_variable::set(const xpath_node_set& value)
9288 {
9289 if (_type != xpath_type_node_set) return false;
9290
9291 static_cast<xpath_variable_node_set*>(this)->value = value;
9292 return true;
9293 }
9294
9295 xpath_variable_set::xpath_variable_set()
9296 {
9297 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
9298 }
9299
9300 xpath_variable_set::~xpath_variable_set()
9301 {
9302 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
9303 {
9304 xpath_variable* var = _data[i];
9305
9306 while (var)
9307 {
9308 xpath_variable* next = var->_next;
9309
9310 delete_xpath_variable(var->_type, var);
9311
9312 var = next;
9313 }
9314 }
9315 }
9316
9317 xpath_variable* xpath_variable_set::find(const char_t* name) const
9318 {
9319 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9320 size_t hash = hash_string(name) % hash_size;
9321
9322 // look for existing variable
9323 for (xpath_variable* var = _data[hash]; var; var = var->_next)
9324 if (strequal(var->name(), name))
9325 return var;
9326
9327 return 0;
9328 }
9329
9330 xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
9331 {
9332 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9333 size_t hash = hash_string(name) % hash_size;
9334
9335 // look for existing variable
9336 for (xpath_variable* var = _data[hash]; var; var = var->_next)
9337 if (strequal(var->name(), name))
9338 return var->type() == type ? var : 0;
9339
9340 // add new variable
9341 xpath_variable* result = new_xpath_variable(type, name);
9342
9343 if (result)
9344 {
9345 result->_type = type;
9346 result->_next = _data[hash];
9347
9348 _data[hash] = result;
9349 }
9350
9351 return result;
9352 }
9353
9354 bool xpath_variable_set::set(const char_t* name, bool value)
9355 {
9356 xpath_variable* var = add(name, xpath_type_boolean);
9357 return var ? var->set(value) : false;
9358 }
9359
9360 bool xpath_variable_set::set(const char_t* name, double value)
9361 {
9362 xpath_variable* var = add(name, xpath_type_number);
9363 return var ? var->set(value) : false;
9364 }
9365
9366 bool xpath_variable_set::set(const char_t* name, const char_t* value)
9367 {
9368 xpath_variable* var = add(name, xpath_type_string);
9369 return var ? var->set(value) : false;
9370 }
9371
9372 bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
9373 {
9374 xpath_variable* var = add(name, xpath_type_node_set);
9375 return var ? var->set(value) : false;
9376 }
9377
9378 xpath_variable* xpath_variable_set::get(const char_t* name)
9379 {
9380 return find(name);
9381 }
9382
9383 const xpath_variable* xpath_variable_set::get(const char_t* name) const
9384 {
9385 return find(name);
9386 }
9387
9388 xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
9389 {
9390 xpath_query_impl* impl = xpath_query_impl::create();
9391
9392 if (!impl)
9393 {
9394 #ifdef PUGIXML_NO_EXCEPTIONS
9395 _result.error = "Out of memory";
9396 #else
9397 throw std::bad_alloc();
9398 #endif
9399 }
9400 else
9401 {
9402 buffer_holder impl_holder(impl, xpath_query_impl::destroy);
9403
9404 impl->root = xpath_parser::parse(query, variables, &impl->alloc, &_result);
9405
9406 if (impl->root)
9407 {
9408 _impl = static_cast<xpath_query_impl*>(impl_holder.release());
9409 _result.error = 0;
9410 }
9411 }
9412 }
9413
9414 xpath_query::~xpath_query()
9415 {
9416 xpath_query_impl::destroy(_impl);
9417 }
9418
9419 xpath_value_type xpath_query::return_type() const
9420 {
9421 if (!_impl) return xpath_type_none;
9422
9423 return static_cast<xpath_query_impl*>(_impl)->root->rettype();
9424 }
9425
9426 bool xpath_query::evaluate_boolean(const xpath_node& n) const
9427 {
9428 if (!_impl) return false;
9429
9430 xpath_context c(n, 1, 1);
9431 xpath_stack_data sd;
9432
9433 #ifdef PUGIXML_NO_EXCEPTIONS
9434 if (setjmp(sd.error_handler)) return false;
9435 #endif
9436
9437 return static_cast<xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
9438 }
9439
9440 double xpath_query::evaluate_number(const xpath_node& n) const
9441 {
9442 if (!_impl) return gen_nan();
9443
9444 xpath_context c(n, 1, 1);
9445 xpath_stack_data sd;
9446
9447 #ifdef PUGIXML_NO_EXCEPTIONS
9448 if (setjmp(sd.error_handler)) return gen_nan();
9449 #endif
9450
9451 return static_cast<xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
9452 }
9453
9454#ifndef PUGIXML_NO_STL
9455 string_t xpath_query::evaluate_string(const xpath_node& n) const
9456 {
9457 xpath_stack_data sd;
9458
9459 return evaluate_string_impl(static_cast<xpath_query_impl*>(_impl), n, sd).c_str();
9460 }
9461#endif
9462
9463 size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
9464 {
9465 xpath_stack_data sd;
9466
9467 xpath_string r = evaluate_string_impl(static_cast<xpath_query_impl*>(_impl), n, sd);
9468
9469 size_t full_size = r.length() + 1;
9470
9471 if (capacity > 0)
9472 {
9473 size_t size = (full_size < capacity) ? full_size : capacity;
9474 assert(size > 0);
9475
9476 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
9477 buffer[size - 1] = 0;
9478 }
9479
9480 return full_size;
9481 }
9482
9483 xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
9484 {
9485 if (!_impl) return xpath_node_set();
9486
9487 xpath_ast_node* root = static_cast<xpath_query_impl*>(_impl)->root;
9488
9489 if (root->rettype() != xpath_type_node_set)
9490 {
9491 #ifdef PUGIXML_NO_EXCEPTIONS
9492 return xpath_node_set();
9493 #else
9494 xpath_parse_result result;
9495 result.error = "Expression does not evaluate to node set";
9496
9497 throw xpath_exception(result);
9498 #endif
9499 }
9500
9501 xpath_context c(n, 1, 1);
9502 xpath_stack_data sd;
9503
9504 #ifdef PUGIXML_NO_EXCEPTIONS
9505 if (setjmp(sd.error_handler)) return xpath_node_set();
9506 #endif
9507
9508 xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
9509
9510 return xpath_node_set(r.begin(), r.end(), r.type());
9511 }
9512
9513 const xpath_parse_result& xpath_query::result() const
9514 {
9515 return _result;
9516 }
9517
9518 xpath_query::operator xpath_query::unspecified_bool_type() const
9519 {
9520 return _impl ? &xpath_query::_impl : 0;
9521 }
9522
9523 bool xpath_query::operator!() const
9524 {
9525 return !_impl;
9526 }
9527
9528 xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
9529 {
9530 xpath_query q(query, variables);
9531 return select_single_node(q);
9532 }
9533
9534 xpath_node xml_node::select_single_node(const xpath_query& query) const
9535 {
9536 xpath_node_set s = query.evaluate_node_set(*this);
9537 return s.empty() ? xpath_node() : s.first();
9538 }
9539
9540 xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
9541 {
9542 xpath_query q(query, variables);
9543 return select_nodes(q);
9544 }
9545
9546 xpath_node_set xml_node::select_nodes(const xpath_query& query) const
9547 {
9548 return query.evaluate_node_set(*this);
9549 }
9550}
9551
9552#endif
9553
9554/**
9555 * Copyright (c) 2006-2010 Arseny Kapoulkine
9556 *
9557 * Permission is hereby granted, free of charge, to any person
9558 * obtaining a copy of this software and associated documentation
9559 * files (the "Software"), to deal in the Software without
9560 * restriction, including without limitation the rights to use,
9561 * copy, modify, merge, publish, distribute, sublicense, and/or sell
9562 * copies of the Software, and to permit persons to whom the
9563 * Software is furnished to do so, subject to the following
9564 * conditions:
9565 *
9566 * The above copyright notice and this permission notice shall be
9567 * included in all copies or substantial portions of the Software.
9568 *
9569 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
9570 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
9571 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
9572 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
9573 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
9574 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
9575 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
9576 * OTHER DEALINGS IN THE SOFTWARE.
9577 */
Note: See TracBrowser for help on using the repository browser.