1 /* 2 * Hunt - A xml library for D programming language. 3 * 4 * Copyright (C) 2018-2019 HuntLabs 5 * 6 * Website: https://www.huntlabs.net 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module rapidxml.skip; 13 14 class parse_error:Exception 15 { 16 this(string msg , string text) 17 { 18 super(msg ~ " " ~ text); 19 } 20 21 this(string msg , char[] text) 22 { 23 super(msg ~ " " ~ cast(string)text.dup); 24 } 25 } 26 27 ubyte[256] lookup_whitespace = [ 28 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 29 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0 30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 31 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2 32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3 33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4 34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5 35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6 36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7 37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8 38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9 39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F 45 ]; // Whitespace table 46 47 ubyte[256] lookup_node_name = [ 48 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 49 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 50 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, // 3 53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 65 ]; // Node name table 66 67 ubyte[256] lookup_element_name = [ 68 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 69 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 71 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, // 3 73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 85 ]; // Element name table 86 87 ubyte[256] lookup_text = [ 88 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 89 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 105 ]; 106 // Text table 107 ubyte[256] lookup_text_pure_no_ws = [ 108 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 109 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 111 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 115 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 116 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 117 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 118 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 119 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 120 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 121 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 122 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 123 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 124 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 125 ]; 126 // Text table 127 ubyte[256] lookup_text_pure_with_ws = [ 128 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 129 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 131 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 145 ]; 146 147 // Text table 148 ubyte[256] lookup_attribute_name = [ 149 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 150 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 151 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 152 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 153 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, // 3 154 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 166 ]; 167 168 // Attribute name table 169 ubyte[256] lookup_attribute_data_1 = [ 170 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 171 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 172 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 173 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 174 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 175 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 177 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 179 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 180 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 182 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 184 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 185 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 186 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 187 ]; 188 189 // Attribute data table with single quote 190 ubyte[256] lookup_attribute_data_1_pure = [ 191 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 192 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 193 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 194 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 195 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 196 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 197 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 198 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 199 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 200 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 201 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 202 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 203 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 204 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 205 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 206 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 207 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 208 ]; 209 210 // Attribute data table with single quote 211 ubyte[256] lookup_attribute_data_2 = [ 212 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 213 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 215 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 217 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 218 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 219 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 220 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 222 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 223 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 224 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 225 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 226 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 227 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 228 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 229 ]; 230 231 // Attribute data table with double quotes 232 ubyte[256] lookup_attribute_data_2_pure = [ 233 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 234 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 235 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 236 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 237 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 238 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 239 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 240 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 241 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 242 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 243 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 244 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 245 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 246 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 247 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 248 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 249 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 250 ]; 251 252 // Attribute data table with double quotes 253 254 ubyte[256] lookup_digits = [ 255 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 256 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0 257 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 1 258 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 2 259 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255, // 3 260 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 4 261 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 5 262 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 6 263 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 7 264 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 8 265 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 9 266 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // A 267 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // B 268 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // C 269 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // D 270 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // E 271 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // F 272 ]; 273 274 // Digits 275 276 ubyte[256] lookup_upcase = [ 277 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A B C D E F 278 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0 279 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, // 1 280 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, // 2 281 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, // 3 282 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 4 283 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, // 5 284 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 6 285 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123,124,125,126,127, // 7 286 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, // 8 287 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, // 9 288 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, // A 289 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, // B 290 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, // C 291 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, // D 292 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, // E 293 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 // F 294 ]; 295 296 // Parsing flags 297 const int parse_no_data_nodes = 0x1; 298 299 const int parse_no_element_values = 0x2; 300 301 const int parse_no_string_terminators = 0x4; 302 303 const int parse_no_entity_translation = 0x8; 304 305 const int parse_no_utf8 = 0x10; 306 307 const int parse_declaration_node = 0x20; 308 309 const int parse_comment_nodes = 0x40; 310 311 const int parse_doctype_node = 0x80; 312 313 const int parse_pi_nodes = 0x100; 314 315 const int parse_validate_closing_tags = 0x200; 316 317 const int parse_trim_whitespace = 0x400; 318 319 const int parse_normalize_whitespace = 0x800; 320 321 const int parse_open_only = 0x1000; 322 323 const int parse_parse_one = 0x2000; 324 325 const int parse_validate_xmlns = 0x4000; 326 327 const int parse_default = 0; 328 329 const int parse_non_destructive = parse_no_string_terminators | parse_no_entity_translation; 330 331 const int parse_fastest = parse_non_destructive | parse_no_data_nodes; 332 333 const int parse_full = parse_declaration_node | parse_comment_nodes | parse_doctype_node | parse_pi_nodes | parse_validate_closing_tags; 334 335 import std.stdio; 336 337 void insert_coded_character(int Flags)(ref char[] text, ulong code) 338 { 339 if (Flags & parse_no_utf8) 340 { 341 // Insert 8-bit ASCII character 342 // Todo: possibly verify that code is less than 256 and use replacement char otherwise? 343 text[0] = (code); 344 text = text[ 1 .. $ - 1]; 345 } 346 else 347 { 348 // Insert UTF8 sequence 349 if (code < 0x80) // 1 byte sequence 350 { 351 text[0] = (code); 352 text = text[1 .. $ - 1]; 353 } 354 else if (code < 0x800) // 2 byte sequence 355 { 356 text[1] = ((code | 0x80) & 0xBF); code >>= 6; 357 text[0] = (code | 0xC0); 358 text = text[ 2 .. $ - 1]; 359 } 360 else if (code < 0x10000) // 3 byte sequence 361 { 362 text[2] = ((code | 0x80) & 0xBF); code >>= 6; 363 text[1] = ((code | 0x80) & 0xBF); code >>= 6; 364 text[0] = (code | 0xE0); 365 text = text[3 .. $ - 1]; 366 } 367 else if (code < 0x110000) // 4 byte sequence 368 { 369 text[3] = ((code | 0x80) & 0xBF); code >>= 6; 370 text[2] = ((code | 0x80) & 0xBF); code >>= 6; 371 text[1] = ((code | 0x80) & 0xBF); code >>= 6; 372 text[0] = (code | 0xF0); 373 text = text[4 .. $ - 1]; 374 } 375 else // Invalid, only codes up to 0x10FFFF are allowed in Unicode 376 { 377 throw new parse_error("invalid numeric character entity", text); 378 } 379 } 380 } 381 382 // Skip characters until predicate evaluates to true while doing the following: 383 // - replacing XML character entity references with proper characters (' & " < > &#...;) 384 // - condensing whitespace sequences to single space character 385 386 static char[] skip_and_expand_character_refs(T , TP , int Flags)(ref char[] text) 387 { 388 // If entity translation, whitespace condense and whitespace trimming is disabled, use plain skip 389 if (Flags & parse_no_entity_translation && 390 !(Flags & parse_normalize_whitespace) && 391 !(Flags & parse_trim_whitespace)) 392 { 393 skip!(T)(text); 394 return text; 395 } 396 397 // Use simple skip until first modification is detected 398 skip!(TP)(text); 399 // Use translation skip 400 char[] src = text; 401 char[] dest = src.dup; 402 long index = 0; 403 while (T.test(src[0])) 404 { 405 // If entity translation is enabled 406 if (!(Flags & parse_no_entity_translation)) 407 { 408 // Test if replacement is needed 409 if (src[0] == ('&')) 410 { 411 switch (src[1]) 412 { 413 414 // & ' 415 case ('a'): 416 if (src[2] == ('m') && src[3] == ('p') && src[4] == (';')) 417 { 418 dest[index] = ('&'); 419 ++index; 420 src=src[5..$-1]; 421 continue; 422 } 423 if (src[2] == ('p') && src[3] == ('o') && src[4] == ('s') && src[5] == (';')) 424 { 425 dest[index] = ('\''); 426 ++index; 427 src = src[6 .. $-1]; 428 continue; 429 } 430 break; 431 432 // " 433 case ('q'): 434 if (src[2] == ('u') && src[3] == ('o') && src[4] == ('t') && src[5] == (';')) 435 { 436 dest[index] = ('"'); 437 ++index; 438 src = src[6 .. $ - 1]; 439 continue; 440 } 441 break; 442 443 // > 444 case ('g'): 445 if (src[2] == ('t') && src[3] == (';')) 446 { 447 dest[index] = ('>'); 448 ++index; 449 src = src[4 .. $ - 1]; 450 continue; 451 } 452 break; 453 454 // < 455 case ('l'): 456 if (src[2] == ('t') && src[3] == (';')) 457 { 458 dest[index] = ('<'); 459 ++index; 460 src = src[ 4 .. $ - 1]; 461 continue; 462 } 463 break; 464 465 // &#...; - assumes ASCII 466 case ('#'): 467 if (src[2] == ('x')) 468 { 469 ulong code = 0; 470 src = src[3 .. $ - 1]; // Skip &#x 471 while (1) 472 { 473 ubyte digit = lookup_digits[src[0]]; 474 if (digit == 0xFF) 475 break; 476 code = code * 16 + digit; 477 src = src[1 .. $ - 1]; 478 } 479 // insert_coded_character!Flags(dest, code); // Put character in output 480 } 481 else 482 { 483 ulong code = 0; 484 src = src[2 .. $ - 1]; // Skip &# 485 while (1) 486 { 487 ubyte digit = lookup_digits[src[0]]; 488 if (digit == 0xFF) 489 break; 490 code = code * 10 + digit; 491 src=src[1 .. $ - 1]; 492 } 493 // insert_coded_character!Flags(dest, code); // Put character in output 494 } 495 if (src[0] == (';')) 496 src=src[1..$ - 1]; 497 else 498 throw new parse_error("expected ;", src); 499 continue; 500 501 // Something else 502 default: 503 // Ignore, just copy '&' verbatim 504 break; 505 506 } 507 } 508 } 509 510 // If whitespace condensing is enabled 511 if (Flags & parse_normalize_whitespace) 512 { 513 // Test if condensing is needed 514 if (whitespace_pred.test(src[0])) 515 { 516 dest[index] = (' '); ++index; // Put single space in dest 517 src = src[1 .. $ - 1]; // Skip first whitespace char 518 // Skip remaining whitespace chars 519 while (whitespace_pred.test(src[0])) 520 src = src[1 .. $ - 1]; 521 continue; 522 } 523 } 524 525 // No replacement, only copy character 526 dest[index] = src[0]; 527 ++index; 528 src = src[1 .. $ - 1]; 529 530 } 531 532 // Return new end 533 text = src; 534 return dest; 535 536 } 537 538 void skip(T)(ref char[] text) 539 { 540 int index = 0; 541 int length = cast(int)text.length; 542 while(T.test(text[index]) && index < text.length) 543 index++; 544 text = text[index .. $]; 545 } 546 547 struct whitespace_pred 548 { 549 static ubyte test(ubyte ch) 550 { 551 return lookup_whitespace[ch]; 552 } 553 } 554 555 // Detect node name character 556 struct node_name_pred 557 { 558 static ubyte test(ubyte ch) 559 { 560 return lookup_node_name[ch]; 561 } 562 } 563 564 // Detect element name character 565 struct element_name_pred 566 { 567 static ubyte test(ubyte ch) 568 { 569 return lookup_element_name[ch]; 570 } 571 } 572 573 // Detect attribute name character 574 struct attribute_name_pred 575 { 576 static ubyte test(ubyte ch) 577 { 578 return lookup_attribute_name[ch]; 579 } 580 } 581 582 // Detect text character (PCDATA) 583 struct text_pred 584 { 585 static ubyte test(ubyte ch) 586 { 587 return lookup_text[ch]; 588 } 589 } 590 591 // Detect text character (PCDATA) that does not require processing 592 struct text_pure_no_ws_pred 593 { 594 static ubyte test(ubyte ch) 595 { 596 return lookup_text_pure_no_ws[ch]; 597 } 598 } 599 600 // Detect text character (PCDATA) that does not require processing 601 struct text_pure_with_ws_pred 602 { 603 static ubyte test(ubyte ch) 604 { 605 return lookup_text_pure_with_ws[ch]; 606 } 607 } 608 609 // Detect attribute value character 610 611 struct attribute_value_pred(alias Quote) 612 { 613 static ubyte test(ubyte ch) 614 { 615 if (Quote == '\'') 616 return lookup_attribute_data_1[ch]; 617 else if (Quote == '"') 618 return lookup_attribute_data_2[ch]; 619 else 620 return 0; // Should never be executed, to avoid warnings on Comeau 621 } 622 } 623 624 // Detect attribute value character 625 struct attribute_value_pure_pred(alias Quote) 626 { 627 static ubyte test(ubyte ch) 628 { 629 if (Quote == '\'') 630 return lookup_attribute_data_1_pure[ch]; 631 else if (Quote == ('"')) 632 return lookup_attribute_data_2_pure[ch]; 633 else 634 return 0; // Should never be executed, to avoid warnings on Comeau 635 } 636 }