rapidxml.rapidxml source code

1 /*
2  * Hunt - A xml library for D programming language.
3  *
4  * Copyright (C) 2006, 2009 Marcin Kalicinski (For C++ Version 1.13)
5  * Copyright (C) 2018-2019 HuntLabs ( For D Language Version)
6  *
7  * Website: https://www.huntlabs.net
8  *
9  * Licensed under the Apache-2.0 License.
10  *
11  */
12 
13 module rapidxml.rapidxml;
14 
15 import std.stdio;
16 import std.exception;
17 
18 import rapidxml.skip;
19 
20 enum node_type
21 {
22     node_document,      //!< A document node. Name and value are empty.
23     node_element,       //!< An element node. Name contains element name. Value contains text of first data node.
24     node_data,          //!< A data node. Name is empty. Value contains data text.
25     node_cdata,         //!< A CDATA node. Name is empty. Value contains data text.
26     node_comment,       //!< A comment node. Name is empty. Value contains comment text.
27     node_declaration,   //!< A declaration node. Name and value are empty. Declaration parameters (version, encoding and standalone) are in node attributes.
28     node_doctype,       //!< A DOCTYPE node. Name is empty. Value contains DOCTYPE text.
29     node_pi,            //!< A PI node. Name contains target. Value contains instructions.
30     node_literal        //!< Value is unencoded text (used for inserting pre-rendered XML).
31 }
32 
33 class xml_base
34 {
35     string m_name;
36     string m_value;
37     xml_node m_parent;
38 
39 }
40 
41 class xml_attribute :  xml_base
42 {
43     xml_attribute m_prev_attribute;
44     xml_attribute m_next_attribute;
45     string    m_xmlns;
46     string  m_local_name;
47 
48     xml_document document() 
49     {
50         if (xml_node node = m_parent)
51         {
52             while (node.m_parent)
53                 node = node.m_parent;
54             return node.m_type == node_type.node_document ? cast(xml_document)(node) : null;
55         }
56         else
57             return null;
58     }
59 
60     string xmlns() 
61     {
62         if (m_xmlns) return m_xmlns;
63         char[] p;
64         char[] name = cast(char[])m_name.dup;
65         for (p = name; p.length > 0 && p[0] != ':'; p=p[1..$])
66         {    
67             if ((m_name.length - p.length) >= m_name.length) 
68                 break;
69         }
70         if (p.length == 0 || ((m_name.length - p.length) >= m_name.length)) {
71             m_xmlns = "nullstring";
72             return m_xmlns;
73         }
74         xml_node  element = m_parent;
75         if (element) 
76         {
77             char []xmlns = cast(char[])m_xmlns;
78             element.xmlns_lookup(xmlns, name[0 .. m_name.length - p.length]);
79             m_xmlns = cast(string)xmlns.dup;
80         }
81         return m_xmlns;
82     }
83 }
84 
85 class xml_node:  xml_base
86 {
87 
88     string m_prefix;
89     string m_xmlns;
90     node_type m_type;
91     xml_node m_first_node;
92     xml_node m_last_node;
93     xml_attribute m_first_attribute;
94     xml_attribute m_last_attribute;
95     xml_node m_prev_sibling;
96     xml_node m_next_sibling;
97     string m_contents;
98 
99     string xmlns()
100     {
101         if(m_xmlns.length > 0)
102             return m_xmlns;
103         char[] xmlns;
104         xmlns_lookup(xmlns , cast(char[])m_prefix);
105         m_xmlns = cast(string)xmlns.dup;
106         return m_xmlns;
107     }
108 
109     xml_document document() 
110     {
111             xml_node node = cast(xml_node)(this);
112             while (node.m_parent)
113                 node = node.m_parent;
114             return node.m_type == node_type.node_document ? cast(xml_document)(node) : null;
115 
116     }
117 
118     void xmlns_lookup(ref char []xmlns,  char[]  prefix) 
119     {
120         char[] freeme;
121         char[] attrname;
122         int prefix_size = cast(int)prefix.length;
123         if (prefix) {
124             // Check if the prefix begins "xml".
125             if (prefix_size >= 3
126                 && prefix[0] == ('x')
127                 && prefix[1] == ('m')
128                 && prefix[2] == ('l')) {
129                 if (prefix_size == 3) {
130                     xmlns = cast(char[]) "http://www.w3.org/XML/1998/namespace";
131                     return;
132                 } else if (prefix_size == 5
133                             && prefix[3] == ('n')
134                             && prefix[4] == ('s')) {
135                     xmlns = cast(char[]) "http://www.w3.org/2000/xmlns/";
136                     return;
137                 }
138             }
139 
140             attrname.length = prefix_size + 6;
141             freeme = attrname;
142             char[] p1= cast(char[])"xmlns";
143             for(int i = 0 ;i < p1.length ; i++)
144                 attrname[i] = p1[i];
145 
146             char [] p = prefix;
147             attrname[p1.length] = ':';
148             int index = cast(int)p1.length + 1;
149             while (p.length > 0) {
150                 attrname[index++] = p[0];
151                 p = p[1 .. $];
152                 if ((freeme.length - attrname[index .. $].length ) >= (prefix_size + 6)) break;
153             }
154             attrname = freeme;
155         } else {
156             attrname.length = 5;
157             freeme = attrname ;
158             char[]  p1=cast(char[])"xmlns";
159             for(int i = 0 ;i < p1.length ; i++)
160                 attrname[i] = p1[i];
161             attrname = freeme;
162         }
163 
164         for ( xml_node node = this;
165                 node;
166                 node = node.m_parent) {
167             xml_attribute attr = node.first_attribute(cast(string)attrname);
168             if (attr !is null ) {
169                 xmlns = cast(char[])attr.m_value.dup;
170                 //  if (xmlns) {
171                 //      xmlns_size = attr->value_size();
172                 //  }
173                 break;
174             }
175         }
176         if (xmlns.length == 0) {
177             if (prefix.length == 0) {
178                 xmlns = cast(char[])"nullstring".dup;
179                 // xmlns_size = 0;
180             }
181         }
182 
183     }
184 
185     xml_node first_node(string name = null , string xmlns = null , bool case_sensitive = true)
186     {
187         if(xmlns.length == 0 && name.length > 0)
188         {
189             xmlns = this.xmlns();
190         }
191 
192         for(xml_node child = m_first_node ; child ; child = child.m_next_sibling)
193         {
194             if((!name || child.m_name == name) && (!xmlns || child.xmlns() == xmlns))
195             {                
196                 return child;
197             }
198         }
199 
200         return null;
201     }
202 
203     xml_node last_node(string name = null , string xmlns = null , bool case_sensitive = true)
204     {
205         for(xml_node child = m_last_node ; child ; child = child.m_prev_sibling)
206         {
207             if((!name || child.m_name == name) && (!xmlns || child.xmlns() == xmlns))
208                 return child;
209         }
210 
211         return null;
212     }
213 
214     void prepend_node(xml_node child)
215     {
216         if(first_node())
217         {
218             child.m_next_sibling = m_first_node;
219             m_first_node.m_prev_sibling = child;
220         }
221         else
222         {
223             child.m_next_sibling = null;
224             m_last_node = child;
225         }
226 
227         m_first_node = child;
228         child.m_parent = this;
229         child.m_prev_sibling = null;
230     }
231 
232     void append_node(xml_node child)
233     {
234         if(first_node())
235         {
236             child.m_prev_sibling = m_last_node;
237             m_last_node.m_next_sibling = child;
238         }
239         else
240         {
241             child.m_prev_sibling = null;
242             m_first_node = child;
243         }
244 
245         m_last_node = child;
246         child.m_parent = this;
247         child.m_next_sibling = null;
248     }
249 
250     void insert_node(xml_node where , xml_node child)
251     {
252         if(where == m_first_node)
253             prepend_node(child);
254         else if(where is null)
255             append_node(child);
256         else
257         {
258             child.m_prev_sibling = where.m_prev_sibling;
259             child.m_next_sibling = where;
260             where.m_prev_sibling.m_next_sibling = child;
261             where.m_prev_sibling = child;
262             child.m_parent = this;
263         }
264     }
265 
266     void remove_first_node()
267     {
268         xml_node child = m_first_node;
269         m_first_node = child.m_next_sibling;
270         if(child.m_next_sibling)
271             child.m_next_sibling.m_prev_sibling = null;
272         else
273             m_last_node = null;
274         child.m_parent = null;
275     }
276 
277     void remove_last_node()
278     {
279         xml_node child = m_last_node;
280         if(child.m_prev_sibling)
281         {
282             m_last_node = child.m_prev_sibling;
283             child.m_prev_sibling.m_next_sibling = null;
284         }
285         else
286         {
287             m_first_node = null;
288         }
289 
290         child.m_parent = null;
291     }
292 
293     void remove_node(xml_node where)
294     {
295         if(where == m_first_node)
296             remove_first_node();
297         else if(where == m_last_node)
298             remove_last_node();
299         else
300         {
301             where.m_prev_sibling.m_next_sibling = where.m_next_sibling;
302             where.m_next_sibling.m_prev_sibling = where.m_prev_sibling;
303             where.m_parent = null;
304         }
305     }
306 
307     void remove_all_nodes()
308     {
309         for( xml_node node = first_node(); node; node = node.m_next_sibling)
310             node.m_parent = null;
311 
312         m_first_node = null;
313     }
314 
315     xml_attribute first_attribute(string name = null , bool case_sensitive = true)
316     {
317         if(name)
318         {
319             for(xml_attribute attribute = m_first_attribute ; attribute ; attribute = attribute.m_next_attribute)
320             {
321 
322                 if(attribute.m_name == name)
323                 {    
324                     return attribute;
325                 }
326             }
327 
328             return null;
329         }
330         else
331         {
332             return m_first_attribute;
333         }
334     }
335 
336     xml_attribute last_attribute(string name = null , bool case_sensitive = true)
337     {
338         if(name)
339         {
340             for(xml_attribute attribute = m_last_attribute ; attribute ; attribute = attribute.m_prev_attribute)
341             {
342                 if(attribute.m_name == name)
343                     return attribute;
344             }
345 
346             return null;
347         }
348         else
349         {
350             return m_last_attribute;
351         }
352     }
353 
354     void prepend_attribute(xml_attribute attribute)
355     {
356         if(first_attribute())
357         {
358             attribute.m_next_attribute = m_first_attribute;
359             m_first_attribute.m_prev_attribute = attribute;
360         }
361         else
362         {
363             attribute.m_next_attribute = null;
364             m_last_attribute = attribute;
365         }
366         m_first_attribute = attribute;
367         attribute.m_parent = this;
368         attribute.m_prev_attribute = null;
369     }
370 
371     void append_attribute(xml_attribute attribute)
372     {
373         if(first_attribute())
374         {
375             attribute.m_prev_attribute = m_last_attribute;
376             m_last_attribute.m_next_attribute = attribute;
377         }
378         else
379         {
380             attribute.m_prev_attribute = null;
381             m_first_attribute = attribute;
382         }
383 
384         m_last_attribute = attribute;
385         attribute.m_parent = this;
386         attribute.m_next_attribute = null;
387     }
388 
389     void insert_attribute(xml_attribute where , xml_attribute attribute)
390     {
391         if(where == m_first_attribute)
392             prepend_attribute(attribute);
393         else if(where is null)
394             append_attribute(attribute);
395         else
396         {
397             attribute.m_prev_attribute = where.m_prev_attribute;
398             attribute.m_next_attribute = where;
399             where.m_prev_attribute.m_next_attribute = attribute;
400             where.m_prev_attribute = attribute;
401             attribute.m_parent = this;
402         }
403     }
404 
405     void remove_first_attribute()
406     {
407         xml_attribute attribute = m_first_attribute;
408         if(attribute.m_next_attribute)
409         {
410             attribute.m_next_attribute.m_prev_attribute = null;
411         }
412         else
413         {
414             m_last_attribute = null;
415         }
416 
417         attribute.m_parent = null;
418         m_first_attribute = attribute.m_next_attribute;
419     }
420 
421     void remove_last_attribute()
422     {
423         xml_attribute attribute = m_last_attribute;
424         if(attribute.m_prev_attribute)
425         {
426             attribute.m_prev_attribute.m_next_attribute = null;
427             m_last_attribute = attribute.m_prev_attribute;
428         }
429         else
430             m_first_attribute = null;
431 
432         attribute.m_parent = null;
433     }
434 
435     void remove_attribute(xml_attribute where)
436     {
437         if(where == m_first_attribute)
438             remove_first_attribute();
439         else if(where == m_last_attribute)
440             remove_last_attribute();
441         else
442         {
443             where.m_prev_attribute.m_next_attribute = where.m_next_attribute;
444             where.m_next_attribute.m_prev_attribute = where.m_prev_attribute;
445             where.m_parent = null;
446         }
447     }
448 
449     void remove_all_attributes()
450     {
451         for(xml_attribute attribute = first_attribute() ; attribute ; attribute = attribute.m_next_attribute)
452         {
453             attribute.m_parent = null;
454         }
455         m_first_attribute = null;
456     }
457 
458     bool validate()
459     {
460         if(this.xmlns() == null)
461         {    
462             writeln("Element XMLNS unbound");
463             return false;
464         }
465         for(xml_node child = first_node(); child ; child = child.m_next_sibling)
466         {
467             if(!child.validate())
468                 return false;
469         }
470         for(xml_attribute attribute = first_attribute() ; attribute ; attribute = attribute.m_next_attribute)
471         {
472             if(attribute.xmlns() == null)
473             {    
474                 writeln("Attribute XMLNS unbound");
475                 return false;
476             }
477             for(xml_attribute otherattr = first_attribute() ; otherattr != attribute; otherattr = otherattr.m_next_attribute)
478             {    
479                 if(attribute.m_name == otherattr.m_name)
480                 {    
481                     writeln("Attribute doubled");
482                     return false;
483                 }
484                 if(attribute.xmlns() == otherattr.xmlns() && attribute.m_local_name == otherattr.m_local_name)
485                 {
486                     writeln("Attribute XMLNS doubled");
487                     return false;
488                 }
489             }
490 
491         }
492         return true;
493     }
494 }
495 
496 class xml_document : xml_node
497 {
498     string parse(int Flags = 0)(string stext , xml_document parent = null)
499     {
500         this.remove_all_nodes();
501         this.remove_all_attributes();
502         this.m_parent = parent ? parent.m_first_node : null;
503         char[] text = cast(char[])stext.dup;
504 
505         parse_bom(text);
506 
507         size_t index = 0;
508         size_t length = text.length;
509         while(1)
510         {
511             skip!(whitespace_pred)(text); 
512             if(index >= text.length)
513                 break;
514             if(text[index] =='<')
515             {
516                 ++index;
517                 text = text[index .. $];
518                 xml_node  node = parse_node!(Flags)(text);
519                 if(node)
520                 {
521                     this.append_node(node);
522                     if(Flags & (parse_open_only | parse_parse_one))
523                     {
524                         if(node.m_type == node_type.node_comment)
525                             break;
526                     }
527                 }
528             }
529             else
530                 throw new parse_error("expected <", text);
531         }
532 
533         if(!first_node())
534             throw new parse_error("no root element", text[index .. $ ]);
535 
536         return string.init;
537     }
538 
539     xml_node parse_node(int Flags)(ref char[] text)
540     {
541         switch(text[0])
542         {
543             default:
544                 return parse_element!Flags(text);
545 
546             case '?':
547                 text = text[1 .. $ ];
548                 if(
549                     ((text[0] == 'x' ) || (text[0] == 'X')) &&
550                 ((text[0] == 'm' ) || (text[0] == 'M')) &&
551                 ((text[0] == 'l' ) || (text[0] == 'L')) &&
552                 whitespace_pred.test(text[3]))
553                 {
554                     text = text[4 .. $];
555                     return parse_xml_declaration!Flags(text);
556                 }
557                 else
558                 {
559                     return parse_pi!Flags(text);
560                 }
561 
562             case '!':
563                 switch(text[1])
564                 {
565                 case '-':
566                     if(text[2] == '-')
567                     {
568                         text = text[3 .. $ ];
569                         return parse_comment!Flags(text);
570                     } 
571                     break;
572                 case ('['):
573                     if (text[2] == ('C') && text[3] == ('D') && text[4] == ('A') &&
574                         text[5] == ('T') && text[6] == ('A') && text[7] == ('['))
575                     {
576                         // '<![CDATA[' - cdata
577                         text = text[8 .. $ ];     // Skip '![CDATA['
578                         return parse_cdata!Flags(text);
579                     }
580                     break;
581 
582                 // <!D
583                 case ('D'):
584                     if (text[2] == ('O') && text[3] == ('C') && text[4] == ('T') &&
585                         text[5] == ('Y') && text[6] == ('P') && text[7] == ('E') &&
586                         whitespace_pred.test(text[8]))
587                     {
588                         // '<!DOCTYPE ' - doctype
589                         text = text[9 .. $ ];      // skip '!DOCTYPE '
590                         return parse_doctype!Flags(text);
591                     }
592                     break;
593                 default:
594                     break;
595 
596                 } 
597 
598                  text = text[1 .. $ ];     // Skip !
599                 while (text[0] != ('>'))
600                 {
601                     if (text == null)
602                         throw new parse_error("unexpected end of data", text);
603                     text = text[1 .. $ ];
604                 }
605                 text = text[1 .. $ ];     // Skip '>'
606                 return null;   // No node recognized
607 
608         }
609     }
610 
611     xml_node parse_cdata(int Flags)(ref char[] text)
612     {
613         // If CDATA is disabled
614         if (Flags & parse_no_data_nodes)
615         {
616             // Skip until end of cdata
617             while (text[0] != ']' || text[1] != ']' || text[2] != '>')
618             {
619                 if (!text[0])
620                     throw new parse_error("unexpected end of data", text);
621                 text = text[1 .. $];
622             }
623             text = text[3 .. $];      // Skip ]]>
624             return null;       // Do not produce CDATA node
625         }
626 
627         // Skip until end of cdata
628         char[] value = text;
629         while (text[0] != (']') || text[1] != (']') || text[2] != ('>'))
630         {
631             if (!text[0])
632                 throw new parse_error("unexpected end of data", text);
633             text = text[1 .. $ ];
634         }
635 
636         // Create new cdata node
637         xml_node cdata = new xml_node;
638         xml_node.m_type = node_type.node_cdata;
639         cdata.m_value = cast(string)value[ 0 .. value.length - text.length].dup;
640 
641         // Place zero terminator after value
642 
643         text = text[3 .. $ ];      // Skip ]]>
644         return cdata;
645     }
646 
647     char parse_and_append_data(int Flags)(xml_node node, ref char []text, char[] contents_start)
648     {
649         // Backup to contents start if whitespace trimming is disabled
650         if (!(Flags & parse_trim_whitespace))
651             text = contents_start;
652 
653         // Skip until end of data
654         char [] value = text;
655         char []end;
656         if (Flags & parse_normalize_whitespace)
657             end = skip_and_expand_character_refs!(text_pred, text_pure_with_ws_pred, Flags)(text);
658         else
659             end = skip_and_expand_character_refs!(text_pred, text_pure_no_ws_pred, Flags)(text);
660 
661         // Trim trailing whitespace if flag is set; leading was already trimmed by whitespace skip after >
662         if (Flags & parse_trim_whitespace)
663         {
664             if (Flags & parse_normalize_whitespace)
665             {
666                 // Whitespace is already condensed to single space characters by skipping function, so just trim 1 char off the end
667                 if (end[-1] == ' ')
668                     end = end[-1 .. $];
669             }
670             else
671             {
672                 // Backup until non-whitespace character is found
673                 while (whitespace_pred.test(end[-1]))
674                     end = end[-1 .. $ - 1];
675             }
676         }
677 
678         // If characters are still left between end and value (this test is only necessary if normalization is enabled)
679         // Create new data node
680         if (!(Flags & parse_no_data_nodes))
681         {
682             xml_node data = new xml_node;
683             data.m_value = cast(string)value[0 .. value.length - end.length].dup;
684             node.append_node(data);
685         }
686 
687         // Add data to parent node if no data exists yet
688         if (!(Flags & parse_no_element_values))
689             if (node.m_value.length == 0)
690                 node.m_value = cast(string)value[0 ..value.length - end.length];
691 
692         // Place zero terminator after value
693         if (!(Flags & parse_no_string_terminators))
694         {
695             ubyte ch = text[0];
696             end[0] ='\0';
697             return ch;      // Return character that ends data; this is required because zero terminator overwritten it
698         }
699         else
700         // Return character that ends data
701         return text[0];
702     }
703 
704     xml_node parse_element(int Flags)(ref char[] text)
705     {
706         xml_node element = new xml_node();
707         char[] prefix = text;
708         //skip element_name_pred
709         skip!(element_name_pred)(text);
710         if(text == prefix)
711             throw new parse_error("expected element name or prefix", text);
712         if(text.length >0 && text[0] == ':')
713         {
714             element.m_prefix = prefix[0 .. prefix.length - text.length].dup;
715             text = text[1 .. $ ];
716             char[] name = text;
717             //skip node_name_pred
718             skip!(node_name_pred)(text);
719             if(text == name)
720                 throw new parse_error("expected element local name", text);
721             element.m_name = name[0 .. name.length - text.length].dup;
722         }
723         else{
724             element.m_name = prefix[ 0 .. prefix.length - text.length].dup;            
725         }
726 
727         //skip whitespace_pred
728         skip!(whitespace_pred)(text);
729         parse_node_attributes!(Flags)(text , element);
730         if(text.length > 0 && text[0] == '>')
731         {
732             text = text[1 .. $];
733             char[] contents = text;
734             char[] contents_end = null;
735             if(!(Flags & parse_open_only))
736             {    
737                 contents_end = parse_node_contents!(Flags)(text , element);
738             }
739             if(contents_end.length != contents.length )
740             {
741                 element.m_contents = cast(string)contents[0 .. contents.length - contents_end.length].dup;
742             }
743         }
744         else if(text.length > 0 && text[0] == '/')
745         {
746             text = text[1 .. $ ];
747             if(text[0] != '>')
748                 throw new parse_error("expected >", text);
749 
750             text = text[1 .. $ ];
751 
752             if(Flags & parse_open_only)
753                 throw new parse_error("open_only, but closed", text);
754         }
755         else 
756             throw new parse_error("expected >", text);
757         // Place zero terminator after name 
758         // no need.
759         return element;
760     }
761 
762     char[] parse_node_contents(int Flags)(ref char[] text , xml_node node)
763     {
764         char[] retval;
765 
766         while(1)
767         {
768             char[] contents_start = text;
769             skip!(whitespace_pred)(text);
770             char next_char = text[0];
771 
772             after_data_node:
773 
774             switch(next_char)
775             {
776                 case '<':
777                 if(text[1] == '/')
778                 {
779                     retval = text;
780                     text = text[2 .. $ ];
781                     if(Flags & parse_validate_closing_tags)
782                     {
783                         string closing_name = cast(string)text.dup;
784                         skip!(node_name_pred)(text);
785                         if(closing_name == node.m_name)
786                             throw new parse_error("invalid closing tag name", text);
787                     }
788                     else
789                     {
790                         skip!(node_name_pred)(text);
791                     }
792 
793                     skip!(whitespace_pred)(text);
794                     if(text[0] != '>')
795                         throw new parse_error("expected >", text);
796                     text = text[1 .. $];
797                     if(Flags & parse_open_only)
798                         throw new parse_error("Unclosed element actually closed.", text);
799 
800                     return retval;
801                 }
802                 else
803                 {
804                     text = text[1 .. $ ];
805                     if(xml_node child = parse_node!(Flags & ~parse_open_only)(text))
806                         node.append_node(child);
807                 }
808                 break;
809             default:
810                  next_char = parse_and_append_data!(Flags)(node, text, contents_start);
811                 goto after_data_node;   // Bypass regular processing after data nodes
812             }
813         }
814 
815         return null;
816     }
817 
818     void parse_node_attributes(int Flags)(ref char[] text , xml_node node)
819     {
820         int index = 0;
821 
822         while(text.length > 0 && attribute_name_pred.test(text[0]))
823         {
824             char[] name = text;
825             text = text[1 .. $ ];
826             skip!(attribute_name_pred)(text);
827             if(text == name)
828                 throw new parse_error("expected attribute name", name);
829 
830             xml_attribute attribute = new xml_attribute();
831             attribute.m_name = cast(string)name[0 .. name.length - text.length].dup;
832 
833             node.append_attribute(attribute);
834 
835             skip!(whitespace_pred)(text);
836 
837             if(text.length ==0 || text[0] != '=')
838                 throw new parse_error("expected =", text);
839 
840             text = text[1 .. $ ];
841 
842             skip!(whitespace_pred)(text);
843 
844             char quote = text[0];
845             if(quote != '\'' && quote != '"')
846                 throw new parse_error("expected ' or \"", text);
847 
848             text = text[1 .. $ ];
849             char[] value = text ;
850             char[] end;
851             const int AttFlags = Flags & ~parse_normalize_whitespace;
852 
853             if(quote == '\'')
854                 end = skip_and_expand_character_refs!(attribute_value_pred!'\'' , attribute_value_pure_pred!('\'') , AttFlags)(text);
855             else
856                 end = skip_and_expand_character_refs!(attribute_value_pred!('"') , attribute_value_pure_pred!('"') , AttFlags)(text);
857 
858             attribute.m_value = cast(string)value[0 .. value.length - end.length].dup;
859 
860             if(text.length > 0 && text[0] != quote)
861                 throw new parse_error("expected ' or \"", text);
862 
863             text = text[1 .. $ ];
864 
865             skip!(whitespace_pred)(text);
866         }
867     }
868 
869     static void skip(T )(ref char[] text)
870     {
871 
872         char[] tmp = text;
873         while(tmp.length > 0 && T.test(tmp[0]))
874         {
875             tmp = tmp[1 .. $];    
876         }
877         text = tmp;
878     }
879 
880     void parse_bom(ref char[] text)
881     {
882         if(text[0] == 0xEF 
883         && text[1] == 0xBB 
884         && text[2] == 0xBF)
885         {
886             text = text[3 .. $ ];
887         }
888     }
889 
890     xml_node parse_xml_declaration(int Flags)(ref char[] text)
891     {
892         // If parsing of declaration is disabled
893         if (!(Flags & parse_declaration_node))
894         {
895             // Skip until end of declaration
896             while (text[0] != '?' || text[1] != '>')
897             {
898                 if (!text[0]) 
899                 throw new parse_error("unexpected end of data", text);
900                 text = text[1 .. $ ];
901             }
902             text = text[2 .. $ ];    // Skip '?>'
903             return null;
904         }
905 
906         static if (Flags != 0)
907         // Create declaration
908         {
909             xml_node declaration = new xml_node;
910             declaration.m_type = node_type.node_declaration;
911 
912             // Skip whitespace before attributes or ?>
913             skip!whitespace_pred(text);
914             // Parse declaration attributes
915             parse_node_attributes!Flags(text, declaration);
916 
917             // Skip ?>
918             if (text[0] != '?' || text[1] != '>') 
919                 throw new parse_error("expected ?>", text);
920             text = text[2 .. $ ];
921 
922             return declaration;
923         }
924     }
925 
926     xml_node parse_pi(int Flags)(ref char[] text)
927     {
928         // If creation of PI nodes is enabled
929         if (Flags & parse_pi_nodes)
930         {
931             // Create pi node
932             xml_node pi = new xml_node;
933             xml_node.m_type = node_type.node_pi;
934 
935             // Extract PI target name
936             char[] name = text;
937             skip!node_name_pred(text);
938             if (text == name) 
939                 throw new parse_error("expected PI target", text);
940             pi.m_name = cast(string)name[0 .. name.length - text.length].dup;
941 
942             // Skip whitespace between pi target and pi
943             skip!whitespace_pred(text);
944 
945             // Remember start of pi
946             char[] value = text;
947 
948             // Skip to '?>'
949             while (text[0] != '?' || text[1] != '>')
950             {
951                 if (text == null)
952                     throw new parse_error("unexpected end of data", text);
953                 text = text[1 .. $ ];
954             }
955 
956             // Set pi value (verbatim, no entity expansion or whitespace normalization)
957             pi.m_value = cast(string)value[ 0 .. value.length - text.length ].dup;
958 
959             // Place zero terminator after name and value
960             // no need
961 
962             text = text[2 .. $ ];                          // Skip '?>'
963             return pi;
964         }
965         else
966         {
967             // Skip to '?>'
968             while (text[0] != '?' || text[1] != '>')
969             {
970                 if (text[0] == '\0')
971                     throw new parse_error("unexpected end of data", text);
972                 text = text[1 .. $ ];
973             }
974             text = text[2 .. $ ];    // Skip '?>'
975             return null;
976         }
977     }
978 
979     xml_node parse_comment(int Flags)(ref char[] text)
980     {
981         // If parsing of comments is disabled
982         if (!(Flags & parse_comment_nodes))
983         {
984             // Skip until end of comment
985             while (text[0] != '-' || text[1] != '-' || text[2] != '>')
986             {
987                 if (!text[0]) throw new parse_error("unexpected end of data", text);
988                 text = text[1 .. $];
989             }
990             text = text [3 .. $];     // Skip '-->'
991             return null;      // Do not produce comment node
992         }
993 
994         // Remember value start
995 
996         static if (Flags != 0)
997         {
998             string value = text;
999 
1000             // Skip until end of comment
1001             while (text[0] != '-' || text[1] != '-' || text[2] != '>')
1002             {
1003                 if (!text[0]) throw new parse_error("unexpected end of data", text);
1004                 text= text[1 .. $];
1005             }
1006 
1007             // Create comment node
1008             xml_node comment = new xml_node;
1009             comment.m_type = node_type.node_comment;
1010             comment.m_value = cast(string)value[0 .. value.length - text.length].dup;
1011 
1012             // Place zero terminator after comment value
1013             // no need
1014 
1015             text = text[3 .. $ ];     // Skip '-->'
1016             return comment;
1017         }
1018     }
1019 
1020     // Parse DOCTYPE
1021 
1022     xml_node parse_doctype(int Flags)(ref char[] text)
1023     {
1024         // Remember value start
1025         char[] value = text;
1026 
1027         // Skip to >
1028         while (text[0] != '>')
1029         {
1030             // Determine character type
1031             switch (text[0])
1032             {
1033 
1034             // If '[' encountered, scan for matching ending ']' using naive algorithm with depth
1035             // This works for all W3C test files except for 2 most wicked
1036             case ('['):
1037             {
1038                 text = text[1 .. $ ];     // Skip '['
1039                 int depth = 1;
1040                 while (depth > 0)
1041                 {
1042                     switch (text[0])
1043                     {
1044                         case '[': ++depth; break;
1045                         case ']': --depth; break;
1046                         default : throw new parse_error("unexpected end of data", text);
1047                     }
1048                     text = text[1 .. $];
1049                 }
1050                 break;
1051             }
1052 
1053             // Error on end of text
1054             case '\0':
1055                 throw new parse_error("unexpected end of data", text);
1056 
1057             // Other character, skip it
1058             default:
1059                 text = text[1 .. $ ];
1060 
1061             }
1062         }
1063 
1064         // If DOCTYPE nodes enabled
1065         if (Flags & parse_doctype_node)
1066         {
1067             // Create a new doctype node
1068             xml_node doctype = new xml_node;
1069             doctype.m_type = node_type.node_doctype;
1070             doctype.m_value = cast(string)value[ 0 .. value.length - text.length].dup;
1071 
1072             // Place zero terminator after value
1073             // no need
1074 
1075             text = text[1 .. $ ];      // skip '>'
1076             return doctype;
1077         }
1078         else
1079         {
1080             text = text[1 .. $ ];      // skip '>'
1081             return null;
1082         }
1083     }
1084 }