1 /* 2 * Hunt - A xml library for D programming language. 3 * 4 * Copyright (C) 2006, 2009 Marcin Kalicinski (For C++ Version 1.13) 5 * Copyright (C) 2018-2019 HuntLabs ( For D Language Version) 6 * 7 * Website: https://www.huntlabs.net 8 * 9 * Licensed under the Apache-2.0 License. 10 * 11 */ 12 13 module rapidxml.rapidxml; 14 15 import std.stdio; 16 import std.exception; 17 18 import rapidxml.skip; 19 20 enum node_type 21 { 22 node_document, //!< A document node. Name and value are empty. 23 node_element, //!< An element node. Name contains element name. Value contains text of first data node. 24 node_data, //!< A data node. Name is empty. Value contains data text. 25 node_cdata, //!< A CDATA node. Name is empty. Value contains data text. 26 node_comment, //!< A comment node. Name is empty. Value contains comment text. 27 node_declaration, //!< A declaration node. Name and value are empty. Declaration parameters (version, encoding and standalone) are in node attributes. 28 node_doctype, //!< A DOCTYPE node. Name is empty. Value contains DOCTYPE text. 29 node_pi, //!< A PI node. Name contains target. Value contains instructions. 30 node_literal //!< Value is unencoded text (used for inserting pre-rendered XML). 31 } 32 33 class xml_base 34 { 35 string m_name; 36 string m_value; 37 xml_node m_parent; 38 39 } 40 41 class xml_attribute : xml_base 42 { 43 xml_attribute m_prev_attribute; 44 xml_attribute m_next_attribute; 45 string m_xmlns; 46 string m_local_name; 47 48 xml_document document() 49 { 50 if (xml_node node = m_parent) 51 { 52 while (node.m_parent) 53 node = node.m_parent; 54 return node.m_type == node_type.node_document ? cast(xml_document)(node) : null; 55 } 56 else 57 return null; 58 } 59 60 string xmlns() 61 { 62 if (m_xmlns) return m_xmlns; 63 char[] p; 64 char[] name = cast(char[])m_name.dup; 65 for (p = name; p.length > 0 && p[0] != ':'; p=p[1..$]) 66 { 67 if ((m_name.length - p.length) >= m_name.length) 68 break; 69 } 70 if (p.length == 0 || ((m_name.length - p.length) >= m_name.length)) { 71 m_xmlns = "nullstring"; 72 return m_xmlns; 73 } 74 xml_node element = m_parent; 75 if (element) 76 { 77 char []xmlns = cast(char[])m_xmlns; 78 element.xmlns_lookup(xmlns, name[0 .. m_name.length - p.length]); 79 m_xmlns = cast(string)xmlns.dup; 80 } 81 return m_xmlns; 82 } 83 } 84 85 class xml_node: xml_base 86 { 87 88 string m_prefix; 89 string m_xmlns; 90 node_type m_type; 91 xml_node m_first_node; 92 xml_node m_last_node; 93 xml_attribute m_first_attribute; 94 xml_attribute m_last_attribute; 95 xml_node m_prev_sibling; 96 xml_node m_next_sibling; 97 string m_contents; 98 99 string xmlns() 100 { 101 if(m_xmlns.length > 0) 102 return m_xmlns; 103 char[] xmlns; 104 xmlns_lookup(xmlns , cast(char[])m_prefix); 105 m_xmlns = cast(string)xmlns.dup; 106 return m_xmlns; 107 } 108 109 xml_document document() 110 { 111 xml_node node = cast(xml_node)(this); 112 while (node.m_parent) 113 node = node.m_parent; 114 return node.m_type == node_type.node_document ? cast(xml_document)(node) : null; 115 116 } 117 118 void xmlns_lookup(ref char []xmlns, char[] prefix) 119 { 120 char[] freeme; 121 char[] attrname; 122 int prefix_size = cast(int)prefix.length; 123 if (prefix) { 124 // Check if the prefix begins "xml". 125 if (prefix_size >= 3 126 && prefix[0] == ('x') 127 && prefix[1] == ('m') 128 && prefix[2] == ('l')) { 129 if (prefix_size == 3) { 130 xmlns = cast(char[]) "http://www.w3.org/XML/1998/namespace"; 131 return; 132 } else if (prefix_size == 5 133 && prefix[3] == ('n') 134 && prefix[4] == ('s')) { 135 xmlns = cast(char[]) "http://www.w3.org/2000/xmlns/"; 136 return; 137 } 138 } 139 140 attrname.length = prefix_size + 6; 141 freeme = attrname; 142 char[] p1= cast(char[])"xmlns"; 143 for(int i = 0 ;i < p1.length ; i++) 144 attrname[i] = p1[i]; 145 146 char [] p = prefix; 147 attrname[p1.length] = ':'; 148 int index = cast(int)p1.length + 1; 149 while (p.length > 0) { 150 attrname[index++] = p[0]; 151 p = p[1 .. $]; 152 if ((freeme.length - attrname[index .. $].length ) >= (prefix_size + 6)) break; 153 } 154 attrname = freeme; 155 } else { 156 attrname.length = 5; 157 freeme = attrname ; 158 char[] p1=cast(char[])"xmlns"; 159 for(int i = 0 ;i < p1.length ; i++) 160 attrname[i] = p1[i]; 161 attrname = freeme; 162 } 163 164 for ( xml_node node = this; 165 node; 166 node = node.m_parent) { 167 xml_attribute attr = node.first_attribute(cast(string)attrname); 168 if (attr !is null ) { 169 xmlns = cast(char[])attr.m_value.dup; 170 // if (xmlns) { 171 // xmlns_size = attr->value_size(); 172 // } 173 break; 174 } 175 } 176 if (xmlns.length == 0) { 177 if (prefix.length == 0) { 178 xmlns = cast(char[])"nullstring".dup; 179 // xmlns_size = 0; 180 } 181 } 182 183 } 184 185 xml_node first_node(string name = null , string xmlns = null , bool case_sensitive = true) 186 { 187 if(xmlns.length == 0 && name.length > 0) 188 { 189 xmlns = this.xmlns(); 190 } 191 192 for(xml_node child = m_first_node ; child ; child = child.m_next_sibling) 193 { 194 if((!name || child.m_name == name) && (!xmlns || child.xmlns() == xmlns)) 195 { 196 return child; 197 } 198 } 199 200 return null; 201 } 202 203 xml_node last_node(string name = null , string xmlns = null , bool case_sensitive = true) 204 { 205 for(xml_node child = m_last_node ; child ; child = child.m_prev_sibling) 206 { 207 if((!name || child.m_name == name) && (!xmlns || child.xmlns() == xmlns)) 208 return child; 209 } 210 211 return null; 212 } 213 214 void prepend_node(xml_node child) 215 { 216 if(first_node()) 217 { 218 child.m_next_sibling = m_first_node; 219 m_first_node.m_prev_sibling = child; 220 } 221 else 222 { 223 child.m_next_sibling = null; 224 m_last_node = child; 225 } 226 227 m_first_node = child; 228 child.m_parent = this; 229 child.m_prev_sibling = null; 230 } 231 232 void append_node(xml_node child) 233 { 234 if(first_node()) 235 { 236 child.m_prev_sibling = m_last_node; 237 m_last_node.m_next_sibling = child; 238 } 239 else 240 { 241 child.m_prev_sibling = null; 242 m_first_node = child; 243 } 244 245 m_last_node = child; 246 child.m_parent = this; 247 child.m_next_sibling = null; 248 } 249 250 void insert_node(xml_node where , xml_node child) 251 { 252 if(where == m_first_node) 253 prepend_node(child); 254 else if(where is null) 255 append_node(child); 256 else 257 { 258 child.m_prev_sibling = where.m_prev_sibling; 259 child.m_next_sibling = where; 260 where.m_prev_sibling.m_next_sibling = child; 261 where.m_prev_sibling = child; 262 child.m_parent = this; 263 } 264 } 265 266 void remove_first_node() 267 { 268 xml_node child = m_first_node; 269 m_first_node = child.m_next_sibling; 270 if(child.m_next_sibling) 271 child.m_next_sibling.m_prev_sibling = null; 272 else 273 m_last_node = null; 274 child.m_parent = null; 275 } 276 277 void remove_last_node() 278 { 279 xml_node child = m_last_node; 280 if(child.m_prev_sibling) 281 { 282 m_last_node = child.m_prev_sibling; 283 child.m_prev_sibling.m_next_sibling = null; 284 } 285 else 286 { 287 m_first_node = null; 288 } 289 290 child.m_parent = null; 291 } 292 293 void remove_node(xml_node where) 294 { 295 if(where == m_first_node) 296 remove_first_node(); 297 else if(where == m_last_node) 298 remove_last_node(); 299 else 300 { 301 where.m_prev_sibling.m_next_sibling = where.m_next_sibling; 302 where.m_next_sibling.m_prev_sibling = where.m_prev_sibling; 303 where.m_parent = null; 304 } 305 } 306 307 void remove_all_nodes() 308 { 309 for( xml_node node = first_node(); node; node = node.m_next_sibling) 310 node.m_parent = null; 311 312 m_first_node = null; 313 } 314 315 xml_attribute first_attribute(string name = null , bool case_sensitive = true) 316 { 317 if(name) 318 { 319 for(xml_attribute attribute = m_first_attribute ; attribute ; attribute = attribute.m_next_attribute) 320 { 321 322 if(attribute.m_name == name) 323 { 324 return attribute; 325 } 326 } 327 328 return null; 329 } 330 else 331 { 332 return m_first_attribute; 333 } 334 } 335 336 xml_attribute last_attribute(string name = null , bool case_sensitive = true) 337 { 338 if(name) 339 { 340 for(xml_attribute attribute = m_last_attribute ; attribute ; attribute = attribute.m_prev_attribute) 341 { 342 if(attribute.m_name == name) 343 return attribute; 344 } 345 346 return null; 347 } 348 else 349 { 350 return m_last_attribute; 351 } 352 } 353 354 void prepend_attribute(xml_attribute attribute) 355 { 356 if(first_attribute()) 357 { 358 attribute.m_next_attribute = m_first_attribute; 359 m_first_attribute.m_prev_attribute = attribute; 360 } 361 else 362 { 363 attribute.m_next_attribute = null; 364 m_last_attribute = attribute; 365 } 366 m_first_attribute = attribute; 367 attribute.m_parent = this; 368 attribute.m_prev_attribute = null; 369 } 370 371 void append_attribute(xml_attribute attribute) 372 { 373 if(first_attribute()) 374 { 375 attribute.m_prev_attribute = m_last_attribute; 376 m_last_attribute.m_next_attribute = attribute; 377 } 378 else 379 { 380 attribute.m_prev_attribute = null; 381 m_first_attribute = attribute; 382 } 383 384 m_last_attribute = attribute; 385 attribute.m_parent = this; 386 attribute.m_next_attribute = null; 387 } 388 389 void insert_attribute(xml_attribute where , xml_attribute attribute) 390 { 391 if(where == m_first_attribute) 392 prepend_attribute(attribute); 393 else if(where is null) 394 append_attribute(attribute); 395 else 396 { 397 attribute.m_prev_attribute = where.m_prev_attribute; 398 attribute.m_next_attribute = where; 399 where.m_prev_attribute.m_next_attribute = attribute; 400 where.m_prev_attribute = attribute; 401 attribute.m_parent = this; 402 } 403 } 404 405 void remove_first_attribute() 406 { 407 xml_attribute attribute = m_first_attribute; 408 if(attribute.m_next_attribute) 409 { 410 attribute.m_next_attribute.m_prev_attribute = null; 411 } 412 else 413 { 414 m_last_attribute = null; 415 } 416 417 attribute.m_parent = null; 418 m_first_attribute = attribute.m_next_attribute; 419 } 420 421 void remove_last_attribute() 422 { 423 xml_attribute attribute = m_last_attribute; 424 if(attribute.m_prev_attribute) 425 { 426 attribute.m_prev_attribute.m_next_attribute = null; 427 m_last_attribute = attribute.m_prev_attribute; 428 } 429 else 430 m_first_attribute = null; 431 432 attribute.m_parent = null; 433 } 434 435 void remove_attribute(xml_attribute where) 436 { 437 if(where == m_first_attribute) 438 remove_first_attribute(); 439 else if(where == m_last_attribute) 440 remove_last_attribute(); 441 else 442 { 443 where.m_prev_attribute.m_next_attribute = where.m_next_attribute; 444 where.m_next_attribute.m_prev_attribute = where.m_prev_attribute; 445 where.m_parent = null; 446 } 447 } 448 449 void remove_all_attributes() 450 { 451 for(xml_attribute attribute = first_attribute() ; attribute ; attribute = attribute.m_next_attribute) 452 { 453 attribute.m_parent = null; 454 } 455 m_first_attribute = null; 456 } 457 458 bool validate() 459 { 460 if(this.xmlns() == null) 461 { 462 writeln("Element XMLNS unbound"); 463 return false; 464 } 465 for(xml_node child = first_node(); child ; child = child.m_next_sibling) 466 { 467 if(!child.validate()) 468 return false; 469 } 470 for(xml_attribute attribute = first_attribute() ; attribute ; attribute = attribute.m_next_attribute) 471 { 472 if(attribute.xmlns() == null) 473 { 474 writeln("Attribute XMLNS unbound"); 475 return false; 476 } 477 for(xml_attribute otherattr = first_attribute() ; otherattr != attribute; otherattr = otherattr.m_next_attribute) 478 { 479 if(attribute.m_name == otherattr.m_name) 480 { 481 writeln("Attribute doubled"); 482 return false; 483 } 484 if(attribute.xmlns() == otherattr.xmlns() && attribute.m_local_name == otherattr.m_local_name) 485 { 486 writeln("Attribute XMLNS doubled"); 487 return false; 488 } 489 } 490 491 } 492 return true; 493 } 494 } 495 496 class xml_document : xml_node 497 { 498 string parse(int Flags = 0)(string stext , xml_document parent = null) 499 { 500 this.remove_all_nodes(); 501 this.remove_all_attributes(); 502 this.m_parent = parent ? parent.m_first_node : null; 503 char[] text = cast(char[])stext.dup; 504 505 parse_bom(text); 506 507 size_t index = 0; 508 size_t length = text.length; 509 while(1) 510 { 511 skip!(whitespace_pred)(text); 512 if(index >= text.length) 513 break; 514 if(text[index] =='<') 515 { 516 ++index; 517 text = text[index .. $]; 518 xml_node node = parse_node!(Flags)(text); 519 if(node) 520 { 521 this.append_node(node); 522 if(Flags & (parse_open_only | parse_parse_one)) 523 { 524 if(node.m_type == node_type.node_comment) 525 break; 526 } 527 } 528 } 529 else 530 throw new parse_error("expected <", text); 531 } 532 533 if(!first_node()) 534 throw new parse_error("no root element", text[index .. $ ]); 535 536 return string.init; 537 } 538 539 xml_node parse_node(int Flags)(ref char[] text) 540 { 541 switch(text[0]) 542 { 543 default: 544 return parse_element!Flags(text); 545 546 case '?': 547 text = text[1 .. $ ]; 548 if( 549 ((text[0] == 'x' ) || (text[0] == 'X')) && 550 ((text[0] == 'm' ) || (text[0] == 'M')) && 551 ((text[0] == 'l' ) || (text[0] == 'L')) && 552 whitespace_pred.test(text[3])) 553 { 554 text = text[4 .. $]; 555 return parse_xml_declaration!Flags(text); 556 } 557 else 558 { 559 return parse_pi!Flags(text); 560 } 561 562 case '!': 563 switch(text[1]) 564 { 565 case '-': 566 if(text[2] == '-') 567 { 568 text = text[3 .. $ ]; 569 return parse_comment!Flags(text); 570 } 571 break; 572 case ('['): 573 if (text[2] == ('C') && text[3] == ('D') && text[4] == ('A') && 574 text[5] == ('T') && text[6] == ('A') && text[7] == ('[')) 575 { 576 // '<![CDATA[' - cdata 577 text = text[8 .. $ ]; // Skip '![CDATA[' 578 return parse_cdata!Flags(text); 579 } 580 break; 581 582 // <!D 583 case ('D'): 584 if (text[2] == ('O') && text[3] == ('C') && text[4] == ('T') && 585 text[5] == ('Y') && text[6] == ('P') && text[7] == ('E') && 586 whitespace_pred.test(text[8])) 587 { 588 // '<!DOCTYPE ' - doctype 589 text = text[9 .. $ ]; // skip '!DOCTYPE ' 590 return parse_doctype!Flags(text); 591 } 592 break; 593 default: 594 break; 595 596 } 597 598 text = text[1 .. $ ]; // Skip ! 599 while (text[0] != ('>')) 600 { 601 if (text == null) 602 throw new parse_error("unexpected end of data", text); 603 text = text[1 .. $ ]; 604 } 605 text = text[1 .. $ ]; // Skip '>' 606 return null; // No node recognized 607 608 } 609 } 610 611 xml_node parse_cdata(int Flags)(ref char[] text) 612 { 613 // If CDATA is disabled 614 if (Flags & parse_no_data_nodes) 615 { 616 // Skip until end of cdata 617 while (text[0] != ']' || text[1] != ']' || text[2] != '>') 618 { 619 if (!text[0]) 620 throw new parse_error("unexpected end of data", text); 621 text = text[1 .. $]; 622 } 623 text = text[3 .. $]; // Skip ]]> 624 return null; // Do not produce CDATA node 625 } 626 627 // Skip until end of cdata 628 char[] value = text; 629 while (text[0] != (']') || text[1] != (']') || text[2] != ('>')) 630 { 631 if (!text[0]) 632 throw new parse_error("unexpected end of data", text); 633 text = text[1 .. $ ]; 634 } 635 636 // Create new cdata node 637 xml_node cdata = new xml_node; 638 xml_node.m_type = node_type.node_cdata; 639 cdata.m_value = cast(string)value[ 0 .. value.length - text.length].dup; 640 641 // Place zero terminator after value 642 643 text = text[3 .. $ ]; // Skip ]]> 644 return cdata; 645 } 646 647 char parse_and_append_data(int Flags)(xml_node node, ref char []text, char[] contents_start) 648 { 649 // Backup to contents start if whitespace trimming is disabled 650 if (!(Flags & parse_trim_whitespace)) 651 text = contents_start; 652 653 // Skip until end of data 654 char [] value = text; 655 char []end; 656 if (Flags & parse_normalize_whitespace) 657 end = skip_and_expand_character_refs!(text_pred, text_pure_with_ws_pred, Flags)(text); 658 else 659 end = skip_and_expand_character_refs!(text_pred, text_pure_no_ws_pred, Flags)(text); 660 661 // Trim trailing whitespace if flag is set; leading was already trimmed by whitespace skip after > 662 if (Flags & parse_trim_whitespace) 663 { 664 if (Flags & parse_normalize_whitespace) 665 { 666 // Whitespace is already condensed to single space characters by skipping function, so just trim 1 char off the end 667 if (end[-1] == ' ') 668 end = end[-1 .. $]; 669 } 670 else 671 { 672 // Backup until non-whitespace character is found 673 while (whitespace_pred.test(end[-1])) 674 end = end[-1 .. $ - 1]; 675 } 676 } 677 678 // If characters are still left between end and value (this test is only necessary if normalization is enabled) 679 // Create new data node 680 if (!(Flags & parse_no_data_nodes)) 681 { 682 xml_node data = new xml_node; 683 data.m_value = cast(string)value[0 .. value.length - end.length].dup; 684 node.append_node(data); 685 } 686 687 // Add data to parent node if no data exists yet 688 if (!(Flags & parse_no_element_values)) 689 if (node.m_value.length == 0) 690 node.m_value = cast(string)value[0 ..value.length - end.length]; 691 692 // Place zero terminator after value 693 if (!(Flags & parse_no_string_terminators)) 694 { 695 ubyte ch = text[0]; 696 end[0] ='\0'; 697 return ch; // Return character that ends data; this is required because zero terminator overwritten it 698 } 699 else 700 // Return character that ends data 701 return text[0]; 702 } 703 704 xml_node parse_element(int Flags)(ref char[] text) 705 { 706 xml_node element = new xml_node(); 707 char[] prefix = text; 708 //skip element_name_pred 709 skip!(element_name_pred)(text); 710 if(text == prefix) 711 throw new parse_error("expected element name or prefix", text); 712 if(text.length >0 && text[0] == ':') 713 { 714 element.m_prefix = prefix[0 .. prefix.length - text.length].dup; 715 text = text[1 .. $ ]; 716 char[] name = text; 717 //skip node_name_pred 718 skip!(node_name_pred)(text); 719 if(text == name) 720 throw new parse_error("expected element local name", text); 721 element.m_name = name[0 .. name.length - text.length].dup; 722 } 723 else{ 724 element.m_name = prefix[ 0 .. prefix.length - text.length].dup; 725 } 726 727 //skip whitespace_pred 728 skip!(whitespace_pred)(text); 729 parse_node_attributes!(Flags)(text , element); 730 if(text.length > 0 && text[0] == '>') 731 { 732 text = text[1 .. $]; 733 char[] contents = text; 734 char[] contents_end = null; 735 if(!(Flags & parse_open_only)) 736 { 737 contents_end = parse_node_contents!(Flags)(text , element); 738 } 739 if(contents_end.length != contents.length ) 740 { 741 element.m_contents = cast(string)contents[0 .. contents.length - contents_end.length].dup; 742 } 743 } 744 else if(text.length > 0 && text[0] == '/') 745 { 746 text = text[1 .. $ ]; 747 if(text[0] != '>') 748 throw new parse_error("expected >", text); 749 750 text = text[1 .. $ ]; 751 752 if(Flags & parse_open_only) 753 throw new parse_error("open_only, but closed", text); 754 } 755 else 756 throw new parse_error("expected >", text); 757 // Place zero terminator after name 758 // no need. 759 return element; 760 } 761 762 char[] parse_node_contents(int Flags)(ref char[] text , xml_node node) 763 { 764 char[] retval; 765 766 while(1) 767 { 768 char[] contents_start = text; 769 skip!(whitespace_pred)(text); 770 char next_char = text[0]; 771 772 after_data_node: 773 774 switch(next_char) 775 { 776 case '<': 777 if(text[1] == '/') 778 { 779 retval = text; 780 text = text[2 .. $ ]; 781 if(Flags & parse_validate_closing_tags) 782 { 783 string closing_name = cast(string)text.dup; 784 skip!(node_name_pred)(text); 785 if(closing_name == node.m_name) 786 throw new parse_error("invalid closing tag name", text); 787 } 788 else 789 { 790 skip!(node_name_pred)(text); 791 } 792 793 skip!(whitespace_pred)(text); 794 if(text[0] != '>') 795 throw new parse_error("expected >", text); 796 text = text[1 .. $]; 797 if(Flags & parse_open_only) 798 throw new parse_error("Unclosed element actually closed.", text); 799 800 return retval; 801 } 802 else 803 { 804 text = text[1 .. $ ]; 805 if(xml_node child = parse_node!(Flags & ~parse_open_only)(text)) 806 node.append_node(child); 807 } 808 break; 809 default: 810 next_char = parse_and_append_data!(Flags)(node, text, contents_start); 811 goto after_data_node; // Bypass regular processing after data nodes 812 } 813 } 814 815 return null; 816 } 817 818 void parse_node_attributes(int Flags)(ref char[] text , xml_node node) 819 { 820 int index = 0; 821 822 while(text.length > 0 && attribute_name_pred.test(text[0])) 823 { 824 char[] name = text; 825 text = text[1 .. $ ]; 826 skip!(attribute_name_pred)(text); 827 if(text == name) 828 throw new parse_error("expected attribute name", name); 829 830 xml_attribute attribute = new xml_attribute(); 831 attribute.m_name = cast(string)name[0 .. name.length - text.length].dup; 832 833 node.append_attribute(attribute); 834 835 skip!(whitespace_pred)(text); 836 837 if(text.length ==0 || text[0] != '=') 838 throw new parse_error("expected =", text); 839 840 text = text[1 .. $ ]; 841 842 skip!(whitespace_pred)(text); 843 844 char quote = text[0]; 845 if(quote != '\'' && quote != '"') 846 throw new parse_error("expected ' or \"", text); 847 848 text = text[1 .. $ ]; 849 char[] value = text ; 850 char[] end; 851 const int AttFlags = Flags & ~parse_normalize_whitespace; 852 853 if(quote == '\'') 854 end = skip_and_expand_character_refs!(attribute_value_pred!'\'' , attribute_value_pure_pred!('\'') , AttFlags)(text); 855 else 856 end = skip_and_expand_character_refs!(attribute_value_pred!('"') , attribute_value_pure_pred!('"') , AttFlags)(text); 857 858 attribute.m_value = cast(string)value[0 .. value.length - end.length].dup; 859 860 if(text.length > 0 && text[0] != quote) 861 throw new parse_error("expected ' or \"", text); 862 863 text = text[1 .. $ ]; 864 865 skip!(whitespace_pred)(text); 866 } 867 } 868 869 static void skip(T )(ref char[] text) 870 { 871 872 char[] tmp = text; 873 while(tmp.length > 0 && T.test(tmp[0])) 874 { 875 tmp = tmp[1 .. $]; 876 } 877 text = tmp; 878 } 879 880 void parse_bom(ref char[] text) 881 { 882 if(text[0] == 0xEF 883 && text[1] == 0xBB 884 && text[2] == 0xBF) 885 { 886 text = text[3 .. $ ]; 887 } 888 } 889 890 xml_node parse_xml_declaration(int Flags)(ref char[] text) 891 { 892 // If parsing of declaration is disabled 893 if (!(Flags & parse_declaration_node)) 894 { 895 // Skip until end of declaration 896 while (text[0] != '?' || text[1] != '>') 897 { 898 if (!text[0]) 899 throw new parse_error("unexpected end of data", text); 900 text = text[1 .. $ ]; 901 } 902 text = text[2 .. $ ]; // Skip '?>' 903 return null; 904 } 905 906 static if (Flags != 0) 907 // Create declaration 908 { 909 xml_node declaration = new xml_node; 910 declaration.m_type = node_type.node_declaration; 911 912 // Skip whitespace before attributes or ?> 913 skip!whitespace_pred(text); 914 // Parse declaration attributes 915 parse_node_attributes!Flags(text, declaration); 916 917 // Skip ?> 918 if (text[0] != '?' || text[1] != '>') 919 throw new parse_error("expected ?>", text); 920 text = text[2 .. $ ]; 921 922 return declaration; 923 } 924 } 925 926 xml_node parse_pi(int Flags)(ref char[] text) 927 { 928 // If creation of PI nodes is enabled 929 if (Flags & parse_pi_nodes) 930 { 931 // Create pi node 932 xml_node pi = new xml_node; 933 xml_node.m_type = node_type.node_pi; 934 935 // Extract PI target name 936 char[] name = text; 937 skip!node_name_pred(text); 938 if (text == name) 939 throw new parse_error("expected PI target", text); 940 pi.m_name = cast(string)name[0 .. name.length - text.length].dup; 941 942 // Skip whitespace between pi target and pi 943 skip!whitespace_pred(text); 944 945 // Remember start of pi 946 char[] value = text; 947 948 // Skip to '?>' 949 while (text[0] != '?' || text[1] != '>') 950 { 951 if (text == null) 952 throw new parse_error("unexpected end of data", text); 953 text = text[1 .. $ ]; 954 } 955 956 // Set pi value (verbatim, no entity expansion or whitespace normalization) 957 pi.m_value = cast(string)value[ 0 .. value.length - text.length ].dup; 958 959 // Place zero terminator after name and value 960 // no need 961 962 text = text[2 .. $ ]; // Skip '?>' 963 return pi; 964 } 965 else 966 { 967 // Skip to '?>' 968 while (text[0] != '?' || text[1] != '>') 969 { 970 if (text[0] == '\0') 971 throw new parse_error("unexpected end of data", text); 972 text = text[1 .. $ ]; 973 } 974 text = text[2 .. $ ]; // Skip '?>' 975 return null; 976 } 977 } 978 979 xml_node parse_comment(int Flags)(ref char[] text) 980 { 981 // If parsing of comments is disabled 982 if (!(Flags & parse_comment_nodes)) 983 { 984 // Skip until end of comment 985 while (text[0] != '-' || text[1] != '-' || text[2] != '>') 986 { 987 if (!text[0]) throw new parse_error("unexpected end of data", text); 988 text = text[1 .. $]; 989 } 990 text = text [3 .. $]; // Skip '-->' 991 return null; // Do not produce comment node 992 } 993 994 // Remember value start 995 996 static if (Flags != 0) 997 { 998 string value = text; 999 1000 // Skip until end of comment 1001 while (text[0] != '-' || text[1] != '-' || text[2] != '>') 1002 { 1003 if (!text[0]) throw new parse_error("unexpected end of data", text); 1004 text= text[1 .. $]; 1005 } 1006 1007 // Create comment node 1008 xml_node comment = new xml_node; 1009 comment.m_type = node_type.node_comment; 1010 comment.m_value = cast(string)value[0 .. value.length - text.length].dup; 1011 1012 // Place zero terminator after comment value 1013 // no need 1014 1015 text = text[3 .. $ ]; // Skip '-->' 1016 return comment; 1017 } 1018 } 1019 1020 // Parse DOCTYPE 1021 1022 xml_node parse_doctype(int Flags)(ref char[] text) 1023 { 1024 // Remember value start 1025 char[] value = text; 1026 1027 // Skip to > 1028 while (text[0] != '>') 1029 { 1030 // Determine character type 1031 switch (text[0]) 1032 { 1033 1034 // If '[' encountered, scan for matching ending ']' using naive algorithm with depth 1035 // This works for all W3C test files except for 2 most wicked 1036 case ('['): 1037 { 1038 text = text[1 .. $ ]; // Skip '[' 1039 int depth = 1; 1040 while (depth > 0) 1041 { 1042 switch (text[0]) 1043 { 1044 case '[': ++depth; break; 1045 case ']': --depth; break; 1046 default : throw new parse_error("unexpected end of data", text); 1047 } 1048 text = text[1 .. $]; 1049 } 1050 break; 1051 } 1052 1053 // Error on end of text 1054 case '\0': 1055 throw new parse_error("unexpected end of data", text); 1056 1057 // Other character, skip it 1058 default: 1059 text = text[1 .. $ ]; 1060 1061 } 1062 } 1063 1064 // If DOCTYPE nodes enabled 1065 if (Flags & parse_doctype_node) 1066 { 1067 // Create a new doctype node 1068 xml_node doctype = new xml_node; 1069 doctype.m_type = node_type.node_doctype; 1070 doctype.m_value = cast(string)value[ 0 .. value.length - text.length].dup; 1071 1072 // Place zero terminator after value 1073 // no need 1074 1075 text = text[1 .. $ ]; // skip '>' 1076 return doctype; 1077 } 1078 else 1079 { 1080 text = text[1 .. $ ]; // skip '>' 1081 return null; 1082 } 1083 } 1084 }